File size: 11,769 Bytes
e909869
 
 
 
26374a1
 
e909869
 
 
 
 
 
 
 
 
079d0c3
e909869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb872b2
5e5394f
fb872b2
e909869
 
 
 
 
 
 
 
 
fb872b2
5e5394f
fb872b2
e909869
 
 
 
 
 
 
 
 
 
 
 
 
 
fb872b2
 
5e5394f
e909869
 
 
 
 
 
 
 
 
 
 
 
 
fb872b2
5e5394f
fb872b2
 
e909869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb872b2
5e5394f
fb872b2
e909869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb872b2
5e5394f
fb872b2
e909869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adb1f5f
 
 
 
 
 
 
e909869
 
 
adb1f5f
 
e909869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d7d569
ee98f4c
 
e909869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee98f4c
e909869
3a811ca
079d0c3
5e5394f
ce70d88
079d0c3
 
 
46fea98
079d0c3
9a3ff64
e909869
b1e21fb
7562209
 
b1e21fb
 
 
 
 
 
7562209
 
 
b1e21fb
7562209
e909869
 
 
7562209
079d0c3
 
7562209
 
 
e909869
 
 
 
 
 
 
7562209
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
import os
import base64
import json

import gradio as gr

from typing_extensions import TypedDict

from openai import OpenAI
from langchain_openai import ChatOpenAI

from langgraph.graph import StateGraph, START, END
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_community.document_loaders import PyPDFLoader

from markdown_pdf import MarkdownPdf, Section
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(
    api_key=os.environ["OPENAI_API_KEY"],
    base_url=os.environ["OPENAI_BASE_URL"]
)

llm = ChatOpenAI(
    api_key=os.environ["OPENAI_API_KEY"],
    base_url=os.environ["OPENAI_BASE_URL"],
    model="gpt-4o-mini",
    temperature=0
)

employee_name = 'John Doe'
type_of_expense = 'Restaurant'

company_policy_file_path = 'Company Policy on Expense Claims.pdf'
loader = PyPDFLoader(company_policy_file_path)
company_policy_document = loader.load()


class State(TypedDict):
    image_path: str
    extracted_text: str
    categorized_text: str
    relevant_company_policy: str
    verified_text: str
    revised_calculation: str
    final_output: str

def generate_data_uri(jpg_file_path):

    with open(jpg_file_path, 'rb') as image_file:
        image_data = image_file.read()
    
    # Encode the binary image data to base64
    base64_encoded_data = base64.b64encode(image_data).decode('utf-8')
    
    # Construct the data URI
    data_uri = f"data:image/png;base64,{base64_encoded_data}"
    
    return data_uri

def text_extractor(state: State):
    """
    This function extracts text from an image using OpenAI's GPT-4o mini model.
    """

    text_extraction_system_message = """
        You are an expert in extracting the text in images.
        Extract the following details from the bill presented in the input.
        - Date of bill
        - Bill No
        - Restaurant Name and Address
        - Items ordered quantity and price
        - Tax and Charges
        - Total amount

        Do not output anything except the above details in your output.
        """

    text_extraction_prompt = [
        {
            'role': 'system',
            'content': text_extraction_system_message
        },
        {
            'role': 'user',
            'content': [
                {'type': "image_url", "image_url": {'url': generate_data_uri(state['image_path'])}}
            ]
        }
    ]

    print("I have access to the Open AI API for text extraction.")
    gr.Info("I have access to the Open AI API for text extraction. I need to format the input image in the Open AI format", duration=2)
    print("I need to format the input image in the Open AI format")

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=text_extraction_prompt,
        temperature=0
    )

    extracted_text = response.choices[0].message.content

    print("Extracted text from the input image")
    gr.Info("Extracted text from the input image", duration=2)

    return {'extracted_text': extracted_text}

def categorizer(state: State):

    categorization_system_message = """
        You are an expert accountant tasked to categorize the items ordered in the bill.
        Categorize the items STRICTLY into the following categories: Alcoholic Drinks, Non-Alcoholic Drinks and Food.
        Remember to categorize the items into one of the three categories only. Do not use new categories. 
        Present your output as a JSON with the following fields: 
        [{'item': '<name of the item>', 'category': '<category assigned>', 'quantity': '<quantity>', 'price': '<price>'}, ... and so on]        
        
        Do not output anything except the above fields in your JSON output.
        Do not delimit the JSON with any extra tags (e.g., ``` or ```JSON).
        """

    print("Categorizing items in the input text to one of: Alcoholic Drinks, Non-Alcoholic Drinks and Food.")
    gr.Info("Categorizing items in the input text to one of: Alcoholic Drinks, Non-Alcoholic Drinks and Food.", duration=1)
    
    categorization_prompt = [
        SystemMessage(content=categorization_system_message),
        HumanMessage(content=state['extracted_text'])
    ]

    categorized_text = llm.invoke(categorization_prompt)

    return {'categorized_text': categorized_text.content}


def verifier(state: State):

    print("I now have to retrieve relevant sections of the company policy to exclude items that are not reimbursable.")
    gr.Info("I now have to retrieve relevant sections of the company policy to exclude items that are not reimbursable. I will use the search tool to execute this step", duration=2)
    print("I will use the search tool to execute this step.")

    for document in company_policy_document:
        if document.page_content.find(f'{type_of_expense}') != -1: 
            relevant_company_policy = document.page_content
            
    verification_system_message = """
        You are an expert accountant tasked to verify the bill details against the provided company policy.
        Verify the items in the submitted bill against the company policy presented below.

        Present your output in the following JSON format after removing the items inthat are not aligned with the company policy.
        [{'item': '<name of the item>', 'category': '<category assigned>', 'quantity': '<quantity>', 'price': '<price>'}, ... and so on]        
        
        Do not output anything except the above details in your JSON output.
        Do not delimit the JSON with any extra tags (e.g., ``` or ```JSON).
        """ 
        
    verification_prompt = [
        SystemMessage(content=verification_system_message + f"\n Company Policy: \n{relevant_company_policy}"),
        HumanMessage(content=state['categorized_text'])
    ]

    verified_text = llm.invoke(verification_prompt)

    return {'verified_text': verified_text.content, 'relevant_company_policy': relevant_company_policy}


def estimator(state: State):

    print("Calculating the revised total amount and taxes")
    gr.Info("Calculating the revised total amount and taxes", duration=1)

    total_bill = 0
    total_taxes_and_charges = 0

    for item in json.loads(state['verified_text']):
        total_bill += float(item['quantity']) * float(item['price'])

    total_taxes_and_charges = total_bill * 0.10 + total_bill * 0.025 + total_bill * 0.025 + total_bill * 0.20        
    
    revised_calculation = {
        'taxes_and_charges': total_taxes_and_charges,
        'total_amount': total_bill + total_taxes_and_charges
    }

    return {'revised_calculation': revised_calculation}


def formatter(state: State):
    print("Formatting the output into a markdown file")
    gr.Info("Formatting the output into a markdown file", duration=1)
    
    final_output_system_message = """ 
        You are an expert accountant tasked to generate the expense claim report.
        Generate the expense claim report based on the calculated total amount to be reimbursed and other details available to you.
        
        The details of the fields needed for the report are present in the input.
        
        These are:
        - Employee Name:
        - Original Bill:
        - Verified items ordered quantity and price:
        - Total amount to be reimbursed:
        - Tax and Charges: 
        
        Use only the details from the input to generate the report.

        Present your output in the following markdown format.
        # Expense Claim Report
        
        ## Employee Name: <Insert Employee Name>
        ## Date: <Insert Date from original bill>
        ## Bill No: <Insert Bill No from original bill>
        ## Restaurant Name and Address: <Insert Restaurant Name and Address fromm original bill>
        ## Items ordered quantity and price (<arrange in a table format from verified list of items>):
            |Item|Quantity|Price|
                ...
            ... 
        ### Tax and Charges: <enter the tax amount from calculated amounts>
        ### Total amount to be reimbursed: <enter the total from calculated amounts>

        Do not output anything except the above details in your output.
        Do not delimit the output with any extra tags (e.g., ```).
        """
    
    input = f"""
    Employee Name: {employee_name}

    ---

    Original Bill:

    {state['extracted_text']}

    ---

    Verified items ordered quantity and price:
    {state['verified_text']}

    ---
    Calculated amounts:
    Taxes and Charges: {state['revised_calculation']['taxes_and_charges']}
    Total amount to be reimbursed: {state['revised_calculation']['total_amount']}

    """
    
    final_output_prompt = [
        SystemMessage(content=final_output_system_message),
        HumanMessage(content=input)
    ]

    final_output = llm.invoke(final_output_prompt)

    return {'final_output': final_output.content}

    
def claim_generator(input_bill_path, progress=gr.Progress()):
    
    progress(0, desc="Starting workflow")

    workflow = StateGraph(State)
    
    workflow.add_node("text_extractor", text_extractor)
    workflow.add_node("categorizer", categorizer)
    workflow.add_node("verifier", verifier)
    workflow.add_node("estimator", estimator)
    workflow.add_node("formatter", formatter)
    
    workflow.add_edge(START, "text_extractor")
    workflow.add_edge("text_extractor", "categorizer")
    workflow.add_edge("categorizer", "verifier")
    workflow.add_edge("verifier", "estimator")
    workflow.add_edge("estimator", "formatter")
    workflow.add_edge("formatter", END)
    
    chain = workflow.compile()
    progress(0.05)
    output = chain.invoke({'image_path': input_bill_path})
    progress(0.25)

    gr.Info("Converting the markdown file to pdf", duration=2)
    
    pdf = MarkdownPdf(toc_level=3)

    pdf.add_section(Section(output['final_output']))
    pdf.save("expense-claim.pdf")
    
    return output['final_output'], 'expense-claim.pdf'

agentic_workflow_representation = 'The agentic workflow used to generate an expense claim document is represented below: \n <img src="https://cdn-uploads.huggingface.co/production/uploads/64118e60756b9e455c7eddd6/iqdGjUG7POKJXLItzWb-K.png">'
agentic_workflow_description = """
This demo represents a multi-agent collaborative workflow that generates an expense claim document based on a submitted bill.
Once a user uploads a bill to the interface, the following steps are executed: 
1. Extract text from an image of the bill.
2. Categorize items in the bill to: alcoholic drinks, non-alcoholic drinks and food.
3. Based on the categories, retrieve relevant sections from the company reimbursement policy; remove items that are non reimbursable.
4. Compute the total amount that can be reimbursed (along with taxes).
5. Prepare a claim document in the company format (as a markdown document).

Each step in this workflow are executed by function-calling agents. 
For example, the text extractor is an API-calling agent that calls uses the Open AI APIs to extract text from the bill.
Similarly, the verifier is a search agent that extracts relevant portions of the company policy based on the nature of the bill. 
"""

demo = gr.Interface(
    fn=claim_generator,
    inputs=gr.Image(type="filepath", label="Upload an image of the bill"),
    outputs=[gr.Markdown(label="Expense Claim Document", show_copy_button=True, container=True),
             gr.File(label="Download your claim document", show_label=True)],
    title="Expense Claim Document Generator",
    description=agentic_workflow_description,
    article=agentic_workflow_representation,
    examples='images',
    cache_examples=False,
    theme=gr.themes.Base(),
    concurrency_limit=16
)

demo.queue()
demo.launch(auth=("johndoe", os.getenv('PASSWD')), ssr_mode=False)