|
"""Module to summarize the given text using AzureChatOpenAI model.""" |
|
import logging |
|
import os |
|
|
|
from dotenv import load_dotenv |
|
from langchain.chains.summarize import load_summarize_chain |
|
from langchain.schema.document import Document |
|
from langchain_core.prompts import PromptTemplate |
|
from langchain_openai import AzureChatOpenAI |
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
|
load_dotenv() |
|
os.environ["AZURE_OPENAI_API_VERSION"] = "2023-06-01-preview" |
|
os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "gpt-35-turbo" |
|
|
|
|
|
def summarize(text): |
|
""" |
|
Summarizes the given text using AzureChatOpenAI model. |
|
|
|
Args: |
|
text (str): The text to be summarized. |
|
|
|
Returns: |
|
str: The concise summary of the text, limited to 70 words or less. |
|
|
|
Raises: |
|
None |
|
""" |
|
llm = AzureChatOpenAI( |
|
openai_api_version="2023-06-01-preview", |
|
azure_deployment="gpt-4o-mini", |
|
) |
|
|
|
prompt_template = """ You're task is to create a concise summary of the following text, |
|
meant as a preview of the article when you hover over it, |
|
keep it less than 70 words, do not insert information not found in the text, |
|
audience is potential reader who wants to know what is the article generally about: |
|
"{text}" |
|
CONCISE SUMMARY::""" |
|
prompt = PromptTemplate.from_template(prompt_template) |
|
|
|
refine_template = ( |
|
"Your job is to produce a final summary\n" |
|
"We have provided an existing summary up to a certain point: {existing_answer}\n" |
|
"We have the opportunity to refine the existing summary" |
|
"(only if needed) with some more context below.\n" |
|
"------------\n" |
|
"{text}\n" |
|
"------------\n" |
|
"Given the new context, refine the original summary, keep it less than 70 words, \ |
|
do not insert information not found in the text," |
|
"Audience is potential reader who wants to know what is the article generally about" |
|
"If the context isn't useful, return the original summary.") |
|
refine_prompt = PromptTemplate.from_template(refine_template) |
|
chain = load_summarize_chain( |
|
llm=llm, |
|
chain_type="refine", |
|
question_prompt=prompt, |
|
refine_prompt=refine_prompt, |
|
return_intermediate_steps=True, |
|
input_key="input_documents", |
|
output_key="output_text", |
|
) |
|
document = Document(page_content=text) |
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3500, |
|
chunk_overlap=100) |
|
split_docs = text_splitter.split_documents([document]) |
|
output_text = chain.invoke({'input_documents': split_docs})["output_text"] |
|
logging.info(output_text) |
|
return output_text |
|
|