Spaces:

Oxbridge-Economics
/

Data-Collection-China

Sleeping

App Files Files Community

Data-Collection-China / controllers /summarizer.py

gavinzli

Update Azure deployment to gpt-4o-mini in summarizer function

423a637 4 months ago

raw

history blame contribute delete

2.74 kB

	"""Module to summarize the given text using AzureChatOpenAI model."""
	import logging
	import os

	from dotenv import load_dotenv
	from langchain.chains.summarize import load_summarize_chain
	from langchain.schema.document import Document
	from langchain_core.prompts import PromptTemplate
	from langchain_openai import AzureChatOpenAI
	from langchain_text_splitters import RecursiveCharacterTextSplitter

	load_dotenv()
	os.environ["AZURE_OPENAI_API_VERSION"] = "2023-06-01-preview"
	os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] = "gpt-35-turbo"


	def summarize(text):
	"""
	Summarizes the given text using AzureChatOpenAI model.

	Args:
	text (str): The text to be summarized.

	Returns:
	str: The concise summary of the text, limited to 70 words or less.

	Raises:
	None
	"""
	llm = AzureChatOpenAI(
	openai_api_version="2023-06-01-preview",
	azure_deployment="gpt-4o-mini",
	)

	prompt_template = """ You're task is to create a concise summary of the following text,
	meant as a preview of the article when you hover over it,
	keep it less than 70 words, do not insert information not found in the text,
	audience is potential reader who wants to know what is the article generally about:
	"{text}"
	CONCISE SUMMARY::"""
	prompt = PromptTemplate.from_template(prompt_template)

	refine_template = (
	"Your job is to produce a final summary\n"
	"We have provided an existing summary up to a certain point: {existing_answer}\n"
	"We have the opportunity to refine the existing summary"
	"(only if needed) with some more context below.\n"
	"------------\n"
	"{text}\n"
	"------------\n"
	"Given the new context, refine the original summary, keep it less than 70 words, \
	do not insert information not found in the text,"
	"Audience is potential reader who wants to know what is the article generally about"
	"If the context isn't useful, return the original summary.")
	refine_prompt = PromptTemplate.from_template(refine_template)
	chain = load_summarize_chain(
	llm=llm,
	chain_type="refine",
	question_prompt=prompt,
	refine_prompt=refine_prompt,
	return_intermediate_steps=True,
	input_key="input_documents",
	output_key="output_text",
	)
	document = Document(page_content=text)
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=3500,
	chunk_overlap=100)
	split_docs = text_splitter.split_documents([document])
	output_text = chain.invoke({'input_documents': split_docs})["output_text"]
	logging.info(output_text)
	return output_text