import gradio as gr import openai import base64 import os # Configurable variables API_KEY = os.getenv("API_KEY") SYSTEM_PROMPT = ( 'You will be given an image of a page out of a bank statement. ' 'You must extract ONLY TWO numbers: Total Deposits, and Total Withdrawals.\n' 'They may be called something else, so you must check. ONLY OUTPUT in the following format:\n' '```\nTotal Deposits: {figure found}\nTotal Withdrawals: {figure found}\n```\n' 'Do not output anything else. Say "N/A" if the figure is not found.' ) # Function to process the image and get GPT-4o response def process_image(image): # Encode the image to base64 with open(image, "rb") as image_file: base64_image = base64.b64encode(image_file.read()).decode('utf-8') # Set up OpenAI API client client = openai.OpenAI(api_key=API_KEY) # Prepare the message payload messages = [ {"role": "system", "content": [{ "type": "text", "text": SYSTEM_PROMPT, }]}, {"role": "user", "content": [{ "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}]} ] # Call the OpenAI API response = client.chat.completions.create( model="gpt-4o", messages=messages ) # Extract and return the response content return response.choices[0].message.content # Create Gradio interface iface = gr.Interface( fn=process_image, inputs=gr.Image(type="filepath", label="Upload Image"), outputs=gr.Textbox(label="Response"), title="Bank Statement Analysis", description="Upload an image to receive the figures" ) # # Launch the interface # if __name__ == "__main__": iface.launch(share=True)