Spaces:
Sleeping
Sleeping
import os | |
import base64 | |
import json | |
import gradio as gr | |
from typing_extensions import TypedDict | |
from openai import OpenAI | |
from langchain_openai import ChatOpenAI | |
from langgraph.graph import StateGraph, START, END | |
from langchain_core.messages import SystemMessage, HumanMessage | |
from langchain_community.document_loaders import PyPDFLoader | |
from markdown_pdf import MarkdownPdf, Section | |
from dotenv import load_dotenv | |
load_dotenv() | |
client = OpenAI( | |
api_key=os.environ["OPENAI_API_KEY"], | |
base_url=os.environ["OPENAI_BASE_URL"] | |
) | |
llm = ChatOpenAI( | |
api_key=os.environ["OPENAI_API_KEY"], | |
base_url=os.environ["OPENAI_BASE_URL"], | |
model="gpt-4o-mini", | |
temperature=0 | |
) | |
employee_name = 'John Doe' | |
type_of_expense = 'Restaurant' | |
company_policy_file_path = 'Company Policy on Expense Claims.pdf' | |
loader = PyPDFLoader(company_policy_file_path) | |
company_policy_document = loader.load() | |
class State(TypedDict): | |
image_path: str | |
extracted_text: str | |
categorized_text: str | |
relevant_company_policy: str | |
verified_text: str | |
revised_calculation: str | |
final_output: str | |
def generate_data_uri(jpg_file_path): | |
with open(jpg_file_path, 'rb') as image_file: | |
image_data = image_file.read() | |
# Encode the binary image data to base64 | |
base64_encoded_data = base64.b64encode(image_data).decode('utf-8') | |
# Construct the data URI | |
data_uri = f"data:image/png;base64,{base64_encoded_data}" | |
return data_uri | |
def text_extractor(state: State): | |
""" | |
This function extracts text from an image using OpenAI's GPT-4o mini model. | |
""" | |
text_extraction_system_message = """ | |
You are an expert in extracting the text in images. | |
Extract the following details from the bill presented in the input. | |
- Date of bill | |
- Bill No | |
- Restaurant Name and Address | |
- Items ordered quantity and price | |
- Tax and Charges | |
- Total amount | |
Do not output anything except the above details in your output. | |
""" | |
text_extraction_prompt = [ | |
{ | |
'role': 'system', | |
'content': text_extraction_system_message | |
}, | |
{ | |
'role': 'user', | |
'content': [ | |
{'type': "image_url", "image_url": {'url': generate_data_uri(state['image_path'])}} | |
] | |
} | |
] | |
print("I have access to the Open AI API for text extraction.") | |
gr.Info("I have access to the Open AI API for text extraction. I need to format the input image in the Open AI format", duration=2) | |
print("I need to format the input image in the Open AI format") | |
response = client.chat.completions.create( | |
model='gpt-4o-mini', | |
messages=text_extraction_prompt, | |
temperature=0 | |
) | |
extracted_text = response.choices[0].message.content | |
print("Extracted text from the input image") | |
gr.Info("Extracted text from the input image", duration=2) | |
return {'extracted_text': extracted_text} | |
def categorizer(state: State): | |
categorization_system_message = """ | |
You are an expert accountant tasked to categorize the items ordered in the bill. | |
Categorize the items STRICTLY into the following categories: Alcoholic Drinks, Non-Alcoholic Drinks and Food. | |
Remember to categorize the items into one of the three categories only. Do not use new categories. | |
Present your output as a JSON with the following fields: | |
[{'item': '<name of the item>', 'category': '<category assigned>', 'quantity': '<quantity>', 'price': '<price>'}, ... and so on] | |
Do not output anything except the above fields in your JSON output. | |
Do not delimit the JSON with any extra tags (e.g., ``` or ```JSON). | |
""" | |
print("Categorizing items in the input text to one of: Alcoholic Drinks, Non-Alcoholic Drinks and Food.") | |
gr.Info("Categorizing items in the input text to one of: Alcoholic Drinks, Non-Alcoholic Drinks and Food.", duration=1) | |
categorization_prompt = [ | |
SystemMessage(content=categorization_system_message), | |
HumanMessage(content=state['extracted_text']) | |
] | |
categorized_text = llm.invoke(categorization_prompt) | |
return {'categorized_text': categorized_text.content} | |
def verifier(state: State): | |
print("I now have to retrieve relevant sections of the company policy to exclude items that are not reimbursable.") | |
gr.Info("I now have to retrieve relevant sections of the company policy to exclude items that are not reimbursable. I will use the search tool to execute this step", duration=2) | |
print("I will use the search tool to execute this step.") | |
for document in company_policy_document: | |
if document.page_content.find(f'{type_of_expense}') != -1: | |
relevant_company_policy = document.page_content | |
verification_system_message = """ | |
You are an expert accountant tasked to verify the bill details against the provided company policy. | |
Verify the items in the submitted bill against the company policy presented below. | |
Present your output in the following JSON format after removing the items inthat are not aligned with the company policy. | |
[{'item': '<name of the item>', 'category': '<category assigned>', 'quantity': '<quantity>', 'price': '<price>'}, ... and so on] | |
Do not output anything except the above details in your JSON output. | |
Do not delimit the JSON with any extra tags (e.g., ``` or ```JSON). | |
""" | |
verification_prompt = [ | |
SystemMessage(content=verification_system_message + f"\n Company Policy: \n{relevant_company_policy}"), | |
HumanMessage(content=state['categorized_text']) | |
] | |
verified_text = llm.invoke(verification_prompt) | |
return {'verified_text': verified_text.content, 'relevant_company_policy': relevant_company_policy} | |
def estimator(state: State): | |
print("Calculating the revised total amount and taxes") | |
gr.Info("Calculating the revised total amount and taxes", duration=1) | |
total_bill = 0 | |
total_taxes_and_charges = 0 | |
for item in json.loads(state['verified_text']): | |
total_bill += float(item['quantity']) * float(item['price']) | |
total_taxes_and_charges = total_bill * 0.10 + total_bill * 0.025 + total_bill * 0.025 + total_bill * 0.20 | |
revised_calculation = { | |
'taxes_and_charges': total_taxes_and_charges, | |
'total_amount': total_bill + total_taxes_and_charges | |
} | |
return {'revised_calculation': revised_calculation} | |
def formatter(state: State): | |
print("Formatting the output into a markdown file") | |
gr.Info("Formatting the output into a markdown file", duration=1) | |
final_output_system_message = """ | |
You are an expert accountant tasked to generate the expense claim report. | |
Generate the expense claim report based on the calculated total amount to be reimbursed and other details available to you. | |
The details of the fields needed for the report are present in the input. | |
These are: | |
- Employee Name: | |
- Original Bill: | |
- Verified items ordered quantity and price: | |
- Total amount to be reimbursed: | |
- Tax and Charges: | |
Use only the details from the input to generate the report. | |
Present your output in the following markdown format. | |
# Expense Claim Report | |
## Employee Name: <Insert Employee Name> | |
## Date: <Insert Date from original bill> | |
## Bill No: <Insert Bill No from original bill> | |
## Restaurant Name and Address: <Insert Restaurant Name and Address fromm original bill> | |
## Items ordered quantity and price (<arrange in a table format from verified list of items>): | |
|Item|Quantity|Price| | |
... | |
... | |
### Tax and Charges: <enter the tax amount from calculated amounts> | |
### Total amount to be reimbursed: <enter the total from calculated amounts> | |
Do not output anything except the above details in your output. | |
Do not delimit the output with any extra tags (e.g., ```). | |
""" | |
input = f""" | |
Employee Name: {employee_name} | |
--- | |
Original Bill: | |
{state['extracted_text']} | |
--- | |
Verified items ordered quantity and price: | |
{state['verified_text']} | |
--- | |
Calculated amounts: | |
Taxes and Charges: {state['revised_calculation']['taxes_and_charges']} | |
Total amount to be reimbursed: {state['revised_calculation']['total_amount']} | |
""" | |
final_output_prompt = [ | |
SystemMessage(content=final_output_system_message), | |
HumanMessage(content=input) | |
] | |
final_output = llm.invoke(final_output_prompt) | |
return {'final_output': final_output.content} | |
def claim_generator(input_bill_path, progress=gr.Progress()): | |
progress(0, desc="Starting workflow") | |
workflow = StateGraph(State) | |
workflow.add_node("text_extractor", text_extractor) | |
workflow.add_node("categorizer", categorizer) | |
workflow.add_node("verifier", verifier) | |
workflow.add_node("estimator", estimator) | |
workflow.add_node("formatter", formatter) | |
workflow.add_edge(START, "text_extractor") | |
workflow.add_edge("text_extractor", "categorizer") | |
workflow.add_edge("categorizer", "verifier") | |
workflow.add_edge("verifier", "estimator") | |
workflow.add_edge("estimator", "formatter") | |
workflow.add_edge("formatter", END) | |
chain = workflow.compile() | |
progress(0.05) | |
output = chain.invoke({'image_path': input_bill_path}) | |
progress(0.25) | |
gr.Info("Converting the markdown file to pdf", duration=2) | |
pdf = MarkdownPdf(toc_level=3) | |
pdf.add_section(Section(output['final_output'])) | |
pdf.save("expense-claim.pdf") | |
return output['final_output'], 'expense-claim.pdf' | |
agentic_workflow_representation = 'The agentic workflow used to generate an expense claim document is represented below: \n <img src="https://cdn-uploads.huggingface.co/production/uploads/64118e60756b9e455c7eddd6/iqdGjUG7POKJXLItzWb-K.png">' | |
agentic_workflow_description = """ | |
This demo represents a multi-agent collaborative workflow that generates an expense claim document based on a submitted bill. | |
Once a user uploads a bill to the interface, the following steps are executed: | |
1. Extract text from an image of the bill. | |
2. Categorize items in the bill to: alcoholic drinks, non-alcoholic drinks and food. | |
3. Based on the categories, retrieve relevant sections from the company reimbursement policy; remove items that are non reimbursable. | |
4. Compute the total amount that can be reimbursed (along with taxes). | |
5. Prepare a claim document in the company format (as a markdown document). | |
Each step in this workflow are executed by function-calling agents. | |
For example, the text extractor is an API-calling agent that calls uses the Open AI APIs to extract text from the bill. | |
Similarly, the verifier is a search agent that extracts relevant portions of the company policy based on the nature of the bill. | |
""" | |
demo = gr.Interface( | |
fn=claim_generator, | |
inputs=gr.Image(type="filepath", label="Upload an image of the bill"), | |
outputs=[gr.Markdown(label="Expense Claim Document", show_copy_button=True, container=True), | |
gr.File(label="Download your claim document", show_label=True)], | |
title="Expense Claim Document Generator", | |
description=agentic_workflow_description, | |
article=agentic_workflow_representation, | |
examples='images', | |
cache_examples=False, | |
theme=gr.themes.Base(), | |
concurrency_limit=16 | |
) | |
demo.queue() | |
demo.launch(auth=("johndoe", os.getenv('PASSWD')), ssr_mode=False) |