from dotenv import load_dotenv import os # import openai from openai import OpenAI from models import FormDetails, TranslateDetails from prompts import system_prompt_template, prompt, translate_system_prompt, translation_prompt import base64 from io import BytesIO import anthropic import nest_asyncio from llama_parse import LlamaParse nest_asyncio.apply() load_dotenv() # set up parser parser = LlamaParse( result_type="markdown" # "markdown" and "text" are available ) OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') # Function to encode the image def encode_image(image): buffer = BytesIO() # Save the image to the buffer in its format (e.g., PNG, JPEG) image.save(buffer, format=image.format) # Get the byte data image_bytes = buffer.getvalue() return base64.b64encode(image_bytes).decode("utf-8") def get_text(image, filename, model, fields="ALL"): # print(model) # Getting the base64 string base64_image = encode_image(image) # check if model name starts with gpt if model.startswith("gpt"): print("gpt") client = OpenAI(api_key = OPENAI_API_KEY) response = client.beta.chat.completions.parse( model=model, messages=[ { "role":"system", "content":system_prompt_template.format(filename,FormDetails.schema_json()) }, { "role": "user", "content": [ { "type": "text", "text": prompt.format(fields), }, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, }, ], } ], response_format=FormDetails, temperature=0.0, ) response = response.choices[0].message.content # check if model name starts with claude elif model.startswith("claude"): print("claude") client = anthropic.Anthropic() message = client.messages.create( model=model, max_tokens=1024, system= system_prompt_template.format(filename,FormDetails.schema_json()) + " In following Json format,class FormDetails(BaseModel):\nfields: List[str]\nvalues: List[str] ", messages=[ { "role": "user", "content": [ { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": base64_image, }, }, { "type": "text", "text": prompt.format(fields), } ], } ], temperature=0.0, ) response = message.content[0].text # check if model name starts with llama_llm elif model.startswith("llama_llm"): print("llama_llm") # Ensure the image is in RGB mode (to handle RGBA images) if image.mode == "RGBA": image = image.convert("RGB") # save image to a file image.save("image.jpg") # parse the image text = parser.load_data("image.jpg") if model == "llama_llm_o": client = OpenAI(api_key = OPENAI_API_KEY) response = client.beta.chat.completions.parse( model="gpt-4o-mini", messages=[ { "role":"system", "content":system_prompt_template.format(filename,FormDetails.schema_json()) }, { "role": "user", "content": f"{prompt.format(fields)} \n Knowledge Base {text}" } ], response_format=FormDetails, temperature=0.0, ) response = response.choices[0].message.content elif model == "llama_llm_d": #deepseek print("deepseek") client = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url=os.getenv('DEEPSEEK_API_URL')) response = client.chat.completions.create( model="deepseek-chat", messages=[ { "role":"system", "content":system_prompt_template.format(filename,FormDetails.schema_json()) }, { "role": "user", "content": f"{prompt.format(fields)} \n Knowledge Base {text}" } ], stream=False, response_format={ 'type': 'json_object' } ) response = response.choices[0].message.content # print(response) return response def translate_text(text, target_language): # print("translating") client = OpenAI(api_key = OPENAI_API_KEY) # print("hi") response = client.beta.chat.completions.parse( model="gpt-4o", messages=[ { "role":"system", "content":translate_system_prompt.format(target_language,TranslateDetails.schema_json()) }, { "role": "user", "content": translation_prompt.format(target_language,text) }, ], response_format=TranslateDetails, temperature=0.0, ) # print(response.choices[0].message.content) response = response.choices[0].message.content return response def chat_text(system,query,history): client = OpenAI() completion = client.chat.completions.create( model="gpt-4o", messages=[ {"role": "system", "content": f"Here is the text you have to answer based on this. Knowledge Base:-{system}. \n History: {history}"}, { "role": "user", "content": query } ] ) return completion.choices[0].message