Spaces:
Sleeping
Sleeping
import gradio as gr | |
import openai | |
import base64 | |
import os | |
# Configurable variables | |
API_KEY = os.getenv("API_KEY") | |
SYSTEM_PROMPT = ( | |
'You will be given an image of a page out of a bank statement. ' | |
'You must extract ONLY TWO numbers: Total Deposits, and Total Withdrawals.\n' | |
'They may be called something else, so you must check. ONLY OUTPUT in the following format:\n' | |
'```\nTotal Deposits: {figure found}\nTotal Withdrawals: {figure found}\n```\n' | |
'Do not output anything else. Say "N/A" if the figure is not found.' | |
) | |
# Function to process the image and get GPT-4o response | |
def process_image(image): | |
# Encode the image to base64 | |
with open(image, "rb") as image_file: | |
base64_image = base64.b64encode(image_file.read()).decode('utf-8') | |
# Set up OpenAI API client | |
client = openai.OpenAI(api_key=API_KEY) | |
# Prepare the message payload | |
messages = [ | |
{"role": "system", "content": [{ | |
"type": "text", | |
"text": SYSTEM_PROMPT, | |
}]}, | |
{"role": "user", "content": [{ | |
"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}]} | |
] | |
# Call the OpenAI API | |
response = client.chat.completions.create( | |
model="gpt-4o", | |
messages=messages | |
) | |
# Extract and return the response content | |
return response.choices[0].message.content | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=process_image, | |
inputs=gr.Image(type="filepath", label="Upload Image"), | |
outputs=gr.Textbox(label="Response"), | |
title="Bank Statement Analysis", | |
description="Upload an image to receive the figures" | |
) | |
# # Launch the interface | |
# if __name__ == "__main__": | |
iface.launch(share=True) | |