from dotenv import load_dotenv import os # import openai from openai import OpenAI from models import FormDetails from prompts import system_prompt_template, prompt import base64 from io import BytesIO import anthropic import nest_asyncio from llama_parse import LlamaParse nest_asyncio.apply() load_dotenv() # set up parser parser = LlamaParse( result_type="markdown" # "markdown" and "text" are available ) OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') # Function to encode the image def encode_image(image): buffer = BytesIO() # Save the image to the buffer in its format (e.g., PNG, JPEG) image.save(buffer, format=image.format) # Get the byte data image_bytes = buffer.getvalue() return base64.b64encode(image_bytes).decode("utf-8") def get_text(image, filename, model, fields="ALL"): # print(model) # Getting the base64 string base64_image = encode_image(image) # check if model name starts with gpt if model.startswith("gpt"): print("gpt") client = OpenAI(api_key = OPENAI_API_KEY) response = client.beta.chat.completions.parse( model=model, messages=[ { "role":"system", "content":system_prompt_template.format(filename,FormDetails.schema_json()) }, { "role": "user", "content": [ { "type": "text", "text": prompt.format(fields), }, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, }, ], } ], response_format=FormDetails, temperature=0.0, ) response = response.choices[0].message.content # check if model name starts with claude elif model.startswith("claude"): print("claude") client = anthropic.Anthropic() message = client.messages.create( model=model, max_tokens=1024, system= system_prompt_template.format(filename,FormDetails.schema_json()) + " In following Json format,class FormDetails(BaseModel):\nfields: List[str]\nvalues: List[str] ", messages=[ { "role": "user", "content": [ { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": base64_image, }, }, { "type": "text", "text": prompt.format(fields), } ], } ], temperature=0.0, ) response = message.content[0].text # check if model name starts with llama_llm elif model.startswith("llama_llm"): print("llama_llm") # Ensure the image is in RGB mode (to handle RGBA images) if image.mode == "RGBA": image = image.convert("RGB") # save image to a file image.save("image.jpg") # parse the image text = parser.load_data("image.jpg") if model == "llama_llm_o": client = OpenAI(api_key = OPENAI_API_KEY) response = client.beta.chat.completions.parse( model="gpt-4o-mini", messages=[ { "role":"system", "content":system_prompt_template.format(filename,FormDetails.schema_json()) }, { "role": "user", "content": f"{prompt.format(fields)} \n Knowledge Base {text}" } ], response_format=FormDetails, temperature=0.0, ) response = response.choices[0].message.content elif model == "llama_llm_d": #deepseek print("deepseek") client = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url=os.getenv('DEEPSEEK_API_URL')) response = client.chat.completions.create( model="deepseek-chat", messages=[ { "role":"system", "content":system_prompt_template.format(filename,FormDetails.schema_json()) }, { "role": "user", "content": f"{prompt.format(fields)} \n Knowledge Base {text}" } ], stream=False, response_format={ 'type': 'json_object' } ) response = response.choices[0].message.content # print(response) return response