Spaces:

rockerritesh
/

documaticai

Sleeping

App Files Files Community

documaticai / utils.py

rockerritesh

Update utils.py

5535935 verified about 2 months ago

raw

history blame contribute delete

6.72 kB

	from dotenv import load_dotenv
	import os
	# import openai
	from openai import OpenAI
	from models import FormDetails, TranslateDetails
	from prompts import system_prompt_template, prompt, translate_system_prompt, translation_prompt
	import base64
	from io import BytesIO
	import anthropic
	import nest_asyncio
	from llama_parse import LlamaParse

	nest_asyncio.apply()
	load_dotenv()

	# set up parser
	parser = LlamaParse(
	result_type="markdown" # "markdown" and "text" are available
	)

	OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')


	# Function to encode the image
	def encode_image(image):
	buffer = BytesIO()
	# Save the image to the buffer in its format (e.g., PNG, JPEG)
	image.save(buffer, format=image.format)
	# Get the byte data
	image_bytes = buffer.getvalue()
	return base64.b64encode(image_bytes).decode("utf-8")

	def get_text(image, filename, model, fields="ALL"):
	# print(model)
	# Getting the base64 string
	base64_image = encode_image(image)
	# check if model name starts with gpt
	if model.startswith("gpt"):
	print("gpt")
	client = OpenAI(api_key = OPENAI_API_KEY)
	response = client.beta.chat.completions.parse(
	model=model,
	messages=[
	{
	"role":"system",
	"content":system_prompt_template.format(filename,FormDetails.schema_json())
	},
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt.format(fields),
	},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
	},
	],
	}
	],
	response_format=FormDetails,
	temperature=0.0,
	)
	response = response.choices[0].message.content
	# check if model name starts with claude
	elif model.startswith("claude"):
	print("claude")
	client = anthropic.Anthropic()
	message = client.messages.create(
	model=model,
	max_tokens=1024,
	system= system_prompt_template.format(filename,FormDetails.schema_json()) + " In following Json format,class FormDetails(BaseModel):\nfields: List[str]\nvalues: List[str] ",
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "image",
	"source": {
	"type": "base64",
	"media_type": "image/png",
	"data": base64_image,
	},
	},
	{
	"type": "text",
	"text": prompt.format(fields),
	}
	],
	}
	],
	temperature=0.0,
	)
	response = message.content[0].text

	# check if model name starts with llama_llm
	elif model.startswith("llama_llm"):
	print("llama_llm")
	# Ensure the image is in RGB mode (to handle RGBA images)
	if image.mode == "RGBA":
	image = image.convert("RGB")
	# save image to a file
	image.save("image.jpg")
	# parse the image
	text = parser.load_data("image.jpg")
	if model == "llama_llm_o":
	client = OpenAI(api_key = OPENAI_API_KEY)
	response = client.beta.chat.completions.parse(
	model="gpt-4o-mini",
	messages=[
	{
	"role":"system",
	"content":system_prompt_template.format(filename,FormDetails.schema_json())
	},
	{
	"role": "user",
	"content": f"{prompt.format(fields)} \n Knowledge Base {text}"
	}
	],
	response_format=FormDetails,
	temperature=0.0,
	)
	response = response.choices[0].message.content
	elif model == "llama_llm_d":
	#deepseek
	print("deepseek")
	client = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url=os.getenv('DEEPSEEK_API_URL'))
	response = client.chat.completions.create(
	model="deepseek-chat",
	messages=[
	{
	"role":"system",
	"content":system_prompt_template.format(filename,FormDetails.schema_json())
	},
	{
	"role": "user",
	"content": f"{prompt.format(fields)} \n Knowledge Base {text}"
	}
	],
	stream=False,
	response_format={
	'type': 'json_object'
	}
	)
	response = response.choices[0].message.content
	# print(response)
	return response

	def translate_text(text, target_language):
	# print("translating")
	client = OpenAI(api_key = OPENAI_API_KEY)
	# print("hi")
	response = client.beta.chat.completions.parse(
	model="gpt-4o",
	messages=[
	{
	"role":"system",
	"content":translate_system_prompt.format(target_language,TranslateDetails.schema_json())
	},
	{
	"role": "user",
	"content": translation_prompt.format(target_language,text)
	},
	],

	response_format=TranslateDetails,
	temperature=0.0,
	)
	# print(response.choices[0].message.content)
	response = response.choices[0].message.content
	return response


	def chat_text(system,query,history):
	client = OpenAI()

	completion = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": f"Here is the text you have to answer based on this. Knowledge Base:-{system}. \n History: {history}"},
	{
	"role": "user",
	"content": query
	}
	]
	)

	return completion.choices[0].message