documaticai / utils.py
rockerritesh's picture
Update utils.py
5535935 verified
from dotenv import load_dotenv
import os
# import openai
from openai import OpenAI
from models import FormDetails, TranslateDetails
from prompts import system_prompt_template, prompt, translate_system_prompt, translation_prompt
import base64
from io import BytesIO
import anthropic
import nest_asyncio
from llama_parse import LlamaParse
nest_asyncio.apply()
load_dotenv()
# set up parser
parser = LlamaParse(
result_type="markdown" # "markdown" and "text" are available
)
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# Function to encode the image
def encode_image(image):
buffer = BytesIO()
# Save the image to the buffer in its format (e.g., PNG, JPEG)
image.save(buffer, format=image.format)
# Get the byte data
image_bytes = buffer.getvalue()
return base64.b64encode(image_bytes).decode("utf-8")
def get_text(image, filename, model, fields="ALL"):
# print(model)
# Getting the base64 string
base64_image = encode_image(image)
# check if model name starts with gpt
if model.startswith("gpt"):
print("gpt")
client = OpenAI(api_key = OPENAI_API_KEY)
response = client.beta.chat.completions.parse(
model=model,
messages=[
{
"role":"system",
"content":system_prompt_template.format(filename,FormDetails.schema_json())
},
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt.format(fields),
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
},
],
}
],
response_format=FormDetails,
temperature=0.0,
)
response = response.choices[0].message.content
# check if model name starts with claude
elif model.startswith("claude"):
print("claude")
client = anthropic.Anthropic()
message = client.messages.create(
model=model,
max_tokens=1024,
system= system_prompt_template.format(filename,FormDetails.schema_json()) + " In following Json format,class FormDetails(BaseModel):\nfields: List[str]\nvalues: List[str] ",
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": base64_image,
},
},
{
"type": "text",
"text": prompt.format(fields),
}
],
}
],
temperature=0.0,
)
response = message.content[0].text
# check if model name starts with llama_llm
elif model.startswith("llama_llm"):
print("llama_llm")
# Ensure the image is in RGB mode (to handle RGBA images)
if image.mode == "RGBA":
image = image.convert("RGB")
# save image to a file
image.save("image.jpg")
# parse the image
text = parser.load_data("image.jpg")
if model == "llama_llm_o":
client = OpenAI(api_key = OPENAI_API_KEY)
response = client.beta.chat.completions.parse(
model="gpt-4o-mini",
messages=[
{
"role":"system",
"content":system_prompt_template.format(filename,FormDetails.schema_json())
},
{
"role": "user",
"content": f"{prompt.format(fields)} \n Knowledge Base {text}"
}
],
response_format=FormDetails,
temperature=0.0,
)
response = response.choices[0].message.content
elif model == "llama_llm_d":
#deepseek
print("deepseek")
client = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url=os.getenv('DEEPSEEK_API_URL'))
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role":"system",
"content":system_prompt_template.format(filename,FormDetails.schema_json())
},
{
"role": "user",
"content": f"{prompt.format(fields)} \n Knowledge Base {text}"
}
],
stream=False,
response_format={
'type': 'json_object'
}
)
response = response.choices[0].message.content
# print(response)
return response
def translate_text(text, target_language):
# print("translating")
client = OpenAI(api_key = OPENAI_API_KEY)
# print("hi")
response = client.beta.chat.completions.parse(
model="gpt-4o",
messages=[
{
"role":"system",
"content":translate_system_prompt.format(target_language,TranslateDetails.schema_json())
},
{
"role": "user",
"content": translation_prompt.format(target_language,text)
},
],
response_format=TranslateDetails,
temperature=0.0,
)
# print(response.choices[0].message.content)
response = response.choices[0].message.content
return response
def chat_text(system,query,history):
client = OpenAI()
completion = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": f"Here is the text you have to answer based on this. Knowledge Base:-{system}. \n History: {history}"},
{
"role": "user",
"content": query
}
]
)
return completion.choices[0].message