File size: 1,737 Bytes
adbaf40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
572e95f
adbaf40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
import openai
import base64
import os

# Configurable variables
API_KEY = os.getenv("API_KEY")
SYSTEM_PROMPT = (
    'You will be given an image of a page out of a bank statement. '
    'You must extract ONLY TWO numbers: Total Deposits, and Total Withdrawals.\n'
    'They may be called something else, so you must check. ONLY OUTPUT in the following format:\n'
    '```\nTotal Deposits: {figure found}\nTotal Withdrawals: {figure found}\n```\n'
    'Do not output anything else. Say "N/A" if the figure is not found.'
)

# Function to process the image and get GPT-4o response
def process_image(image):
    # Encode the image to base64
    with open(image, "rb") as image_file:
        base64_image = base64.b64encode(image_file.read()).decode('utf-8')

    # Set up OpenAI API client
    client = openai.OpenAI(api_key=API_KEY)

    # Prepare the message payload
    messages = [
        {"role": "system", "content": [{
            "type": "text",
            "text": SYSTEM_PROMPT,
        }]},
        {"role": "user", "content": [{
            "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}]}
    ]

    # Call the OpenAI API
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages
    )

    # Extract and return the response content
    return response.choices[0].message.content

# Create Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="filepath", label="Upload Image"),
    outputs=gr.Textbox(label="Response"),
    title="Bank Statement Analysis",
    description="Upload an image to receive the figures"
)

# # Launch the interface
# if __name__ == "__main__":
iface.launch(share=True)