Spaces:
Sleeping
Sleeping
File size: 5,426 Bytes
fc9f4fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
from dotenv import load_dotenv
import os
# import openai
from openai import OpenAI
from models import FormDetails
from prompts import system_prompt_template, prompt
import base64
from io import BytesIO
import anthropic
import nest_asyncio
from llama_parse import LlamaParse
nest_asyncio.apply()
load_dotenv()
# set up parser
parser = LlamaParse(
result_type="markdown" # "markdown" and "text" are available
)
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# Function to encode the image
def encode_image(image):
buffer = BytesIO()
# Save the image to the buffer in its format (e.g., PNG, JPEG)
image.save(buffer, format=image.format)
# Get the byte data
image_bytes = buffer.getvalue()
return base64.b64encode(image_bytes).decode("utf-8")
def get_text(image, filename, model, fields="ALL"):
# print(model)
# Getting the base64 string
base64_image = encode_image(image)
# check if model name starts with gpt
if model.startswith("gpt"):
print("gpt")
client = OpenAI(api_key = OPENAI_API_KEY)
response = client.beta.chat.completions.parse(
model=model,
messages=[
{
"role":"system",
"content":system_prompt_template.format(filename,FormDetails.schema_json())
},
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt.format(fields),
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
},
],
}
],
response_format=FormDetails,
temperature=0.0,
)
response = response.choices[0].message.content
# check if model name starts with claude
elif model.startswith("claude"):
print("claude")
client = anthropic.Anthropic()
message = client.messages.create(
model=model,
max_tokens=1024,
system= system_prompt_template.format(filename,FormDetails.schema_json()) + " In following Json format,class FormDetails(BaseModel):\nfields: List[str]\nvalues: List[str] ",
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": base64_image,
},
},
{
"type": "text",
"text": prompt.format(fields),
}
],
}
],
temperature=0.0,
)
response = message.content[0].text
# check if model name starts with llama_llm
elif model.startswith("llama_llm"):
print("llama_llm")
# Ensure the image is in RGB mode (to handle RGBA images)
if image.mode == "RGBA":
image = image.convert("RGB")
# save image to a file
image.save("image.jpg")
# parse the image
text = parser.load_data("image.jpg")
if model == "llama_llm_o":
client = OpenAI(api_key = OPENAI_API_KEY)
response = client.beta.chat.completions.parse(
model="gpt-4o-mini",
messages=[
{
"role":"system",
"content":system_prompt_template.format(filename,FormDetails.schema_json())
},
{
"role": "user",
"content": f"{prompt.format(fields)} \n Knowledge Base {text}"
}
],
response_format=FormDetails,
temperature=0.0,
)
response = response.choices[0].message.content
elif model == "llama_llm_d":
#deepseek
print("deepseek")
client = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url=os.getenv('DEEPSEEK_API_URL'))
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role":"system",
"content":system_prompt_template.format(filename,FormDetails.schema_json())
},
{
"role": "user",
"content": f"{prompt.format(fields)} \n Knowledge Base {text}"
}
],
stream=False,
response_format={
'type': 'json_object'
}
)
response = response.choices[0].message.content
# print(response)
return response |