"""Module to summarize the given text using AzureChatOpenAI model.""" import logging import os from dotenv import load_dotenv from langchain.chains.summarize import load_summarize_chain from langchain.schema.document import Document from langchain_core.prompts import PromptTemplate from langchain_openai import AzureChatOpenAI from langchain_text_splitters import RecursiveCharacterTextSplitter load_dotenv() os.environ["AZURE_OPENAI_API_VERSION"] = "2023-06-01-preview" os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "gpt-35-turbo" def summarize(text): """ Summarizes the given text using AzureChatOpenAI model. Args: text (str): The text to be summarized. Returns: str: The concise summary of the text, limited to 70 words or less. Raises: None """ llm = AzureChatOpenAI( openai_api_version="2023-06-01-preview", azure_deployment="gpt-4o-mini", ) prompt_template = """ You're task is to create a concise summary of the following text, meant as a preview of the article when you hover over it, keep it less than 70 words, do not insert information not found in the text, audience is potential reader who wants to know what is the article generally about: "{text}" CONCISE SUMMARY::""" prompt = PromptTemplate.from_template(prompt_template) refine_template = ( "Your job is to produce a final summary\n" "We have provided an existing summary up to a certain point: {existing_answer}\n" "We have the opportunity to refine the existing summary" "(only if needed) with some more context below.\n" "------------\n" "{text}\n" "------------\n" "Given the new context, refine the original summary, keep it less than 70 words, \ do not insert information not found in the text," "Audience is potential reader who wants to know what is the article generally about" "If the context isn't useful, return the original summary.") refine_prompt = PromptTemplate.from_template(refine_template) chain = load_summarize_chain( llm=llm, chain_type="refine", question_prompt=prompt, refine_prompt=refine_prompt, return_intermediate_steps=True, input_key="input_documents", output_key="output_text", ) document = Document(page_content=text) text_splitter = RecursiveCharacterTextSplitter(chunk_size=3500, chunk_overlap=100) split_docs = text_splitter.split_documents([document]) output_text = chain.invoke({'input_documents': split_docs})["output_text"] logging.info(output_text) return output_text