Spaces:

rockerritesh
/

documaticai

Sleeping

File size: 6,717 Bytes

from dotenv import load_dotenv
import os
# import openai
from openai import OpenAI
from models import FormDetails, TranslateDetails
from prompts import system_prompt_template, prompt, translate_system_prompt, translation_prompt
import base64
from io import BytesIO
import anthropic
import nest_asyncio
from llama_parse import LlamaParse

nest_asyncio.apply()
load_dotenv()

# set up parser
parser = LlamaParse(
    result_type="markdown"  # "markdown" and "text" are available
)

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')


# Function to encode the image
def encode_image(image):    
    buffer = BytesIO()
    # Save the image to the buffer in its format (e.g., PNG, JPEG)
    image.save(buffer, format=image.format)
    # Get the byte data
    image_bytes = buffer.getvalue()
    return base64.b64encode(image_bytes).decode("utf-8")

def get_text(image, filename, model, fields="ALL"):    
    # print(model)
    # Getting the base64 string
    base64_image = encode_image(image)
    # check if model name starts with gpt
    if model.startswith("gpt"):
        print("gpt")
        client = OpenAI(api_key = OPENAI_API_KEY)
        response = client.beta.chat.completions.parse(
            model=model,
            messages=[
                {
                    "role":"system",
                    "content":system_prompt_template.format(filename,FormDetails.schema_json())
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt.format(fields),
                        },
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
                        },
                    ],
                }
            ],
            response_format=FormDetails,
            temperature=0.0,
        )
        response = response.choices[0].message.content
    # check if model name starts with claude
    elif model.startswith("claude"):
        print("claude")
        client = anthropic.Anthropic()
        message = client.messages.create(
            model=model,
            max_tokens=1024,
            system= system_prompt_template.format(filename,FormDetails.schema_json()) + " In following Json format,class FormDetails(BaseModel):\nfields: List[str]\nvalues: List[str] ",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/png",
                                "data": base64_image,
                            },
                        },
                        {
                            "type": "text",
                            "text": prompt.format(fields),
                        }
                    ],
                }
            ],
            temperature=0.0,
        )
        response = message.content[0].text

    # check if model name starts with llama_llm
    elif model.startswith("llama_llm"):
        print("llama_llm")
        # Ensure the image is in RGB mode (to handle RGBA images)
        if image.mode == "RGBA":
            image = image.convert("RGB")
        # save image to a file
        image.save("image.jpg")
        # parse the image
        text = parser.load_data("image.jpg")
        if model == "llama_llm_o":
            client = OpenAI(api_key = OPENAI_API_KEY)
            response = client.beta.chat.completions.parse(
                model="gpt-4o-mini",
                messages=[
                    {
                        "role":"system",
                        "content":system_prompt_template.format(filename,FormDetails.schema_json())
                    },
                    {
                        "role": "user",
                        "content": f"{prompt.format(fields)} \n Knowledge Base {text}"  
                    }
                ],
                response_format=FormDetails,
                temperature=0.0,
            )
            response = response.choices[0].message.content
        elif model == "llama_llm_d":
            #deepseek
            print("deepseek")
            client = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url=os.getenv('DEEPSEEK_API_URL'))
            response = client.chat.completions.create(
                        model="deepseek-chat",
                        messages=[
                                    {
                                        "role":"system",
                                        "content":system_prompt_template.format(filename,FormDetails.schema_json())
                                    },
                                    {
                                        "role": "user",
                                        "content": f"{prompt.format(fields)} \n Knowledge Base {text}"  
                                    }
                                ],
                        stream=False,
                        response_format={
                                            'type': 'json_object'
                                        }
                    )
            response = response.choices[0].message.content
    # print(response)
    return response

def translate_text(text, target_language):
    # print("translating")
    client = OpenAI(api_key = OPENAI_API_KEY)
    # print("hi")
    response = client.beta.chat.completions.parse(
        model="gpt-4o",
        messages=[
                {
                    "role":"system",
                    "content":translate_system_prompt.format(target_language,TranslateDetails.schema_json())
                },
                {
                "role": "user",
                "content": translation_prompt.format(target_language,text)
                },
                ],

        response_format=TranslateDetails,
        temperature=0.0,
    )
    # print(response.choices[0].message.content)
    response = response.choices[0].message.content
    return response


def chat_text(system,query,history):
    client = OpenAI()
    
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": f"Here is the text you have to answer based on this. Knowledge Base:-{system}. \n History: {history}"},
            {
                "role": "user",
                "content": query
            }
        ]
    )
    
    return completion.choices[0].message