Spaces:
Sleeping
Sleeping
File size: 6,717 Bytes
fc9f4fe 51ef325 fc9f4fe 51ef325 5535935 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
from dotenv import load_dotenv
import os
# import openai
from openai import OpenAI
from models import FormDetails, TranslateDetails
from prompts import system_prompt_template, prompt, translate_system_prompt, translation_prompt
import base64
from io import BytesIO
import anthropic
import nest_asyncio
from llama_parse import LlamaParse
nest_asyncio.apply()
load_dotenv()
# set up parser
parser = LlamaParse(
result_type="markdown" # "markdown" and "text" are available
)
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# Function to encode the image
def encode_image(image):
buffer = BytesIO()
# Save the image to the buffer in its format (e.g., PNG, JPEG)
image.save(buffer, format=image.format)
# Get the byte data
image_bytes = buffer.getvalue()
return base64.b64encode(image_bytes).decode("utf-8")
def get_text(image, filename, model, fields="ALL"):
# print(model)
# Getting the base64 string
base64_image = encode_image(image)
# check if model name starts with gpt
if model.startswith("gpt"):
print("gpt")
client = OpenAI(api_key = OPENAI_API_KEY)
response = client.beta.chat.completions.parse(
model=model,
messages=[
{
"role":"system",
"content":system_prompt_template.format(filename,FormDetails.schema_json())
},
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt.format(fields),
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
},
],
}
],
response_format=FormDetails,
temperature=0.0,
)
response = response.choices[0].message.content
# check if model name starts with claude
elif model.startswith("claude"):
print("claude")
client = anthropic.Anthropic()
message = client.messages.create(
model=model,
max_tokens=1024,
system= system_prompt_template.format(filename,FormDetails.schema_json()) + " In following Json format,class FormDetails(BaseModel):\nfields: List[str]\nvalues: List[str] ",
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": base64_image,
},
},
{
"type": "text",
"text": prompt.format(fields),
}
],
}
],
temperature=0.0,
)
response = message.content[0].text
# check if model name starts with llama_llm
elif model.startswith("llama_llm"):
print("llama_llm")
# Ensure the image is in RGB mode (to handle RGBA images)
if image.mode == "RGBA":
image = image.convert("RGB")
# save image to a file
image.save("image.jpg")
# parse the image
text = parser.load_data("image.jpg")
if model == "llama_llm_o":
client = OpenAI(api_key = OPENAI_API_KEY)
response = client.beta.chat.completions.parse(
model="gpt-4o-mini",
messages=[
{
"role":"system",
"content":system_prompt_template.format(filename,FormDetails.schema_json())
},
{
"role": "user",
"content": f"{prompt.format(fields)} \n Knowledge Base {text}"
}
],
response_format=FormDetails,
temperature=0.0,
)
response = response.choices[0].message.content
elif model == "llama_llm_d":
#deepseek
print("deepseek")
client = OpenAI(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url=os.getenv('DEEPSEEK_API_URL'))
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role":"system",
"content":system_prompt_template.format(filename,FormDetails.schema_json())
},
{
"role": "user",
"content": f"{prompt.format(fields)} \n Knowledge Base {text}"
}
],
stream=False,
response_format={
'type': 'json_object'
}
)
response = response.choices[0].message.content
# print(response)
return response
def translate_text(text, target_language):
# print("translating")
client = OpenAI(api_key = OPENAI_API_KEY)
# print("hi")
response = client.beta.chat.completions.parse(
model="gpt-4o",
messages=[
{
"role":"system",
"content":translate_system_prompt.format(target_language,TranslateDetails.schema_json())
},
{
"role": "user",
"content": translation_prompt.format(target_language,text)
},
],
response_format=TranslateDetails,
temperature=0.0,
)
# print(response.choices[0].message.content)
response = response.choices[0].message.content
return response
def chat_text(system,query,history):
client = OpenAI()
completion = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": f"Here is the text you have to answer based on this. Knowledge Base:-{system}. \n History: {history}"},
{
"role": "user",
"content": query
}
]
)
return completion.choices[0].message |