Spaces:

Oxbridge-Economics
/

Data-Collection-China

Sleeping

File size: 2,741 Bytes

"""Module to summarize the given text using AzureChatOpenAI model."""
import logging
import os

from dotenv import load_dotenv
from langchain.chains.summarize import load_summarize_chain
from langchain.schema.document import Document
from langchain_core.prompts import PromptTemplate
from langchain_openai import AzureChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter

load_dotenv()
os.environ["AZURE_OPENAI_API_VERSION"] = "2023-06-01-preview"
os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "gpt-35-turbo"


def summarize(text):
    """
    Summarizes the given text using AzureChatOpenAI model.

    Args:
        text (str): The text to be summarized.

    Returns:
        str: The concise summary of the text, limited to 70 words or less.

    Raises:
        None
    """
    llm = AzureChatOpenAI(
        openai_api_version="2023-06-01-preview",
        azure_deployment="gpt-4o-mini",
    )

    prompt_template = """ You're task is to create a concise summary of the following text,
      meant as a preview of the article when you hover over it,
      keep it less than 70 words, do not insert information not found in the text,
      audience is potential reader who wants to know what is the article generally about:
      "{text}"
      CONCISE SUMMARY::"""
    prompt = PromptTemplate.from_template(prompt_template)

    refine_template = (
        "Your job is to produce a final summary\n"
        "We have provided an existing summary up to a certain point: {existing_answer}\n"
        "We have the opportunity to refine the existing summary"
        "(only if needed) with some more context below.\n"
        "------------\n"
        "{text}\n"
        "------------\n"
        "Given the new context, refine the original summary, keep it less than 70 words, \
            do not insert information not found in the text,"
        "Audience is potential reader who wants to know what is the article generally about"
        "If the context isn't useful, return the original summary.")
    refine_prompt = PromptTemplate.from_template(refine_template)
    chain = load_summarize_chain(
        llm=llm,
        chain_type="refine",
        question_prompt=prompt,
        refine_prompt=refine_prompt,
        return_intermediate_steps=True,
        input_key="input_documents",
        output_key="output_text",
    )
    document = Document(page_content=text)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=3500,
                                                   chunk_overlap=100)
    split_docs = text_splitter.split_documents([document])
    output_text = chain.invoke({'input_documents': split_docs})["output_text"]
    logging.info(output_text)
    return output_text