Spaces:

rockerritesh
/

documaticai

Sleeping

File size: 5,426 Bytes

fc9f4fe

from dotenv import load_dotenv
import os
# import openai
from openai import OpenAI
from models import FormDetails
from prompts import system_prompt_template, prompt
import base64
from io import BytesIO
import anthropic
import nest_asyncio
from llama_parse import LlamaParse

nest_asyncio.apply()
load_dotenv()

# set up parser
parser = LlamaParse(
    result_type="markdown"  # "markdown" and "text" are available
)

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')


# Function to encode the image
def encode_image(image):    
    buffer = BytesIO()
    # Save the image to the buffer in its format (e.g., PNG, JPEG)
    image.save(buffer, format=image.format)
    # Get the byte data
    image_bytes = buffer.getvalue()
    return base64.b64encode(image_bytes).decode("utf-8")

def get_text(image, filename, model, fields="ALL"):    
    # print(model)
    # Getting the base64 string
    base64_image = encode_image(image)
    # check if model name starts with gpt
    if model.startswith("gpt"):
        print("gpt")
        client = OpenAI(api_key = OPENAI_API_KEY)
        response = client.beta.chat.completions.parse(
            model=model,
            messages=[
                {
                    "role":"system",
                    "content":system_prompt_template.format(filename,FormDetails.schema_json())
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt.format(fields),
                        },
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
                        },
                    ],
                }
            ],
            response_format=FormDetails,
            temperature=0.0,
        )
        response = response.choices[0].message.content
    # check if model name starts with claude
    elif model.startswith("claude"):
        print("claude")
        client = anthropic.Anthropic()
        message = client.messages.create(
            model=model,
            max_tokens=1024,
            system= system_prompt_template.format(filename,FormDetails.schema_json()) + " In following Json format,class FormDetails(BaseModel):\nfields: List[str]\nvalues: List[str] ",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/png",
                                "data": base64_image,
                            },
                        },
                        {
                            "type": "text",
                            "text": prompt.format(fields),
                        }
                    ],
                }
            ],
            temperature=0.0,
        )
        response = message.content[0].text

    # check if model name starts with llama_llm
    elif model.startswith("llama_llm"):
        print("llama_llm")
        # Ensure the image is in RGB mode (to handle RGBA images)
        if image.mode == "RGBA":
            image = image.convert("RGB")
        # save image to a file
        image.save("image.jpg")
        # parse the image
        text = parser.load_data("image.jpg")
        if model == "llama_llm_o":
            client = OpenAI(api_key = OPENAI_API_KEY)
            response = client.beta.chat.completions.parse(
                model="gpt-4o-mini",
                messages=[
                    {
                        "role":"system",
                        "content":system_prompt_template.format(filename,FormDetails.schema_json())
                    },
                    {
                        "role": "user",
                        "content": f"{prompt.format(fields)} \n Knowledge Base {text}"  
                    }
                ],
                response_format=FormDetails,
                temperature=0.0,
            )
            response = response.choices[0].message.content
        elif model == "llama_llm_d":
            #deepseek
            print("deepseek")
            client = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url=os.getenv('DEEPSEEK_API_URL'))
            response = client.chat.completions.create(
                        model="deepseek-chat",
                        messages=[
                                    {
                                        "role":"system",
                                        "content":system_prompt_template.format(filename,FormDetails.schema_json())
                                    },
                                    {
                                        "role": "user",
                                        "content": f"{prompt.format(fields)} \n Knowledge Base {text}"  
                                    }
                                ],
                        stream=False,
                        response_format={
                                            'type': 'json_object'
                                        }
                    )
            response = response.choices[0].message.content
    # print(response)
    return response