Spaces:
Sleeping
Sleeping
File size: 11,769 Bytes
e909869 26374a1 e909869 079d0c3 e909869 fb872b2 5e5394f fb872b2 e909869 fb872b2 5e5394f fb872b2 e909869 fb872b2 5e5394f e909869 fb872b2 5e5394f fb872b2 e909869 fb872b2 5e5394f fb872b2 e909869 fb872b2 5e5394f fb872b2 e909869 adb1f5f e909869 adb1f5f e909869 2d7d569 ee98f4c e909869 ee98f4c e909869 3a811ca 079d0c3 5e5394f ce70d88 079d0c3 46fea98 079d0c3 9a3ff64 e909869 b1e21fb 7562209 b1e21fb 7562209 b1e21fb 7562209 e909869 7562209 079d0c3 7562209 e909869 7562209 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 |
import os
import base64
import json
import gradio as gr
from typing_extensions import TypedDict
from openai import OpenAI
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, START, END
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_community.document_loaders import PyPDFLoader
from markdown_pdf import MarkdownPdf, Section
from dotenv import load_dotenv
load_dotenv()
client = OpenAI(
api_key=os.environ["OPENAI_API_KEY"],
base_url=os.environ["OPENAI_BASE_URL"]
)
llm = ChatOpenAI(
api_key=os.environ["OPENAI_API_KEY"],
base_url=os.environ["OPENAI_BASE_URL"],
model="gpt-4o-mini",
temperature=0
)
employee_name = 'John Doe'
type_of_expense = 'Restaurant'
company_policy_file_path = 'Company Policy on Expense Claims.pdf'
loader = PyPDFLoader(company_policy_file_path)
company_policy_document = loader.load()
class State(TypedDict):
image_path: str
extracted_text: str
categorized_text: str
relevant_company_policy: str
verified_text: str
revised_calculation: str
final_output: str
def generate_data_uri(jpg_file_path):
with open(jpg_file_path, 'rb') as image_file:
image_data = image_file.read()
# Encode the binary image data to base64
base64_encoded_data = base64.b64encode(image_data).decode('utf-8')
# Construct the data URI
data_uri = f"data:image/png;base64,{base64_encoded_data}"
return data_uri
def text_extractor(state: State):
"""
This function extracts text from an image using OpenAI's GPT-4o mini model.
"""
text_extraction_system_message = """
You are an expert in extracting the text in images.
Extract the following details from the bill presented in the input.
- Date of bill
- Bill No
- Restaurant Name and Address
- Items ordered quantity and price
- Tax and Charges
- Total amount
Do not output anything except the above details in your output.
"""
text_extraction_prompt = [
{
'role': 'system',
'content': text_extraction_system_message
},
{
'role': 'user',
'content': [
{'type': "image_url", "image_url": {'url': generate_data_uri(state['image_path'])}}
]
}
]
print("I have access to the Open AI API for text extraction.")
gr.Info("I have access to the Open AI API for text extraction. I need to format the input image in the Open AI format", duration=2)
print("I need to format the input image in the Open AI format")
response = client.chat.completions.create(
model='gpt-4o-mini',
messages=text_extraction_prompt,
temperature=0
)
extracted_text = response.choices[0].message.content
print("Extracted text from the input image")
gr.Info("Extracted text from the input image", duration=2)
return {'extracted_text': extracted_text}
def categorizer(state: State):
categorization_system_message = """
You are an expert accountant tasked to categorize the items ordered in the bill.
Categorize the items STRICTLY into the following categories: Alcoholic Drinks, Non-Alcoholic Drinks and Food.
Remember to categorize the items into one of the three categories only. Do not use new categories.
Present your output as a JSON with the following fields:
[{'item': '<name of the item>', 'category': '<category assigned>', 'quantity': '<quantity>', 'price': '<price>'}, ... and so on]
Do not output anything except the above fields in your JSON output.
Do not delimit the JSON with any extra tags (e.g., ``` or ```JSON).
"""
print("Categorizing items in the input text to one of: Alcoholic Drinks, Non-Alcoholic Drinks and Food.")
gr.Info("Categorizing items in the input text to one of: Alcoholic Drinks, Non-Alcoholic Drinks and Food.", duration=1)
categorization_prompt = [
SystemMessage(content=categorization_system_message),
HumanMessage(content=state['extracted_text'])
]
categorized_text = llm.invoke(categorization_prompt)
return {'categorized_text': categorized_text.content}
def verifier(state: State):
print("I now have to retrieve relevant sections of the company policy to exclude items that are not reimbursable.")
gr.Info("I now have to retrieve relevant sections of the company policy to exclude items that are not reimbursable. I will use the search tool to execute this step", duration=2)
print("I will use the search tool to execute this step.")
for document in company_policy_document:
if document.page_content.find(f'{type_of_expense}') != -1:
relevant_company_policy = document.page_content
verification_system_message = """
You are an expert accountant tasked to verify the bill details against the provided company policy.
Verify the items in the submitted bill against the company policy presented below.
Present your output in the following JSON format after removing the items inthat are not aligned with the company policy.
[{'item': '<name of the item>', 'category': '<category assigned>', 'quantity': '<quantity>', 'price': '<price>'}, ... and so on]
Do not output anything except the above details in your JSON output.
Do not delimit the JSON with any extra tags (e.g., ``` or ```JSON).
"""
verification_prompt = [
SystemMessage(content=verification_system_message + f"\n Company Policy: \n{relevant_company_policy}"),
HumanMessage(content=state['categorized_text'])
]
verified_text = llm.invoke(verification_prompt)
return {'verified_text': verified_text.content, 'relevant_company_policy': relevant_company_policy}
def estimator(state: State):
print("Calculating the revised total amount and taxes")
gr.Info("Calculating the revised total amount and taxes", duration=1)
total_bill = 0
total_taxes_and_charges = 0
for item in json.loads(state['verified_text']):
total_bill += float(item['quantity']) * float(item['price'])
total_taxes_and_charges = total_bill * 0.10 + total_bill * 0.025 + total_bill * 0.025 + total_bill * 0.20
revised_calculation = {
'taxes_and_charges': total_taxes_and_charges,
'total_amount': total_bill + total_taxes_and_charges
}
return {'revised_calculation': revised_calculation}
def formatter(state: State):
print("Formatting the output into a markdown file")
gr.Info("Formatting the output into a markdown file", duration=1)
final_output_system_message = """
You are an expert accountant tasked to generate the expense claim report.
Generate the expense claim report based on the calculated total amount to be reimbursed and other details available to you.
The details of the fields needed for the report are present in the input.
These are:
- Employee Name:
- Original Bill:
- Verified items ordered quantity and price:
- Total amount to be reimbursed:
- Tax and Charges:
Use only the details from the input to generate the report.
Present your output in the following markdown format.
# Expense Claim Report
## Employee Name: <Insert Employee Name>
## Date: <Insert Date from original bill>
## Bill No: <Insert Bill No from original bill>
## Restaurant Name and Address: <Insert Restaurant Name and Address fromm original bill>
## Items ordered quantity and price (<arrange in a table format from verified list of items>):
|Item|Quantity|Price|
...
...
### Tax and Charges: <enter the tax amount from calculated amounts>
### Total amount to be reimbursed: <enter the total from calculated amounts>
Do not output anything except the above details in your output.
Do not delimit the output with any extra tags (e.g., ```).
"""
input = f"""
Employee Name: {employee_name}
---
Original Bill:
{state['extracted_text']}
---
Verified items ordered quantity and price:
{state['verified_text']}
---
Calculated amounts:
Taxes and Charges: {state['revised_calculation']['taxes_and_charges']}
Total amount to be reimbursed: {state['revised_calculation']['total_amount']}
"""
final_output_prompt = [
SystemMessage(content=final_output_system_message),
HumanMessage(content=input)
]
final_output = llm.invoke(final_output_prompt)
return {'final_output': final_output.content}
def claim_generator(input_bill_path, progress=gr.Progress()):
progress(0, desc="Starting workflow")
workflow = StateGraph(State)
workflow.add_node("text_extractor", text_extractor)
workflow.add_node("categorizer", categorizer)
workflow.add_node("verifier", verifier)
workflow.add_node("estimator", estimator)
workflow.add_node("formatter", formatter)
workflow.add_edge(START, "text_extractor")
workflow.add_edge("text_extractor", "categorizer")
workflow.add_edge("categorizer", "verifier")
workflow.add_edge("verifier", "estimator")
workflow.add_edge("estimator", "formatter")
workflow.add_edge("formatter", END)
chain = workflow.compile()
progress(0.05)
output = chain.invoke({'image_path': input_bill_path})
progress(0.25)
gr.Info("Converting the markdown file to pdf", duration=2)
pdf = MarkdownPdf(toc_level=3)
pdf.add_section(Section(output['final_output']))
pdf.save("expense-claim.pdf")
return output['final_output'], 'expense-claim.pdf'
agentic_workflow_representation = 'The agentic workflow used to generate an expense claim document is represented below: \n <img src="https://cdn-uploads.huggingface.co/production/uploads/64118e60756b9e455c7eddd6/iqdGjUG7POKJXLItzWb-K.png">'
agentic_workflow_description = """
This demo represents a multi-agent collaborative workflow that generates an expense claim document based on a submitted bill.
Once a user uploads a bill to the interface, the following steps are executed:
1. Extract text from an image of the bill.
2. Categorize items in the bill to: alcoholic drinks, non-alcoholic drinks and food.
3. Based on the categories, retrieve relevant sections from the company reimbursement policy; remove items that are non reimbursable.
4. Compute the total amount that can be reimbursed (along with taxes).
5. Prepare a claim document in the company format (as a markdown document).
Each step in this workflow are executed by function-calling agents.
For example, the text extractor is an API-calling agent that calls uses the Open AI APIs to extract text from the bill.
Similarly, the verifier is a search agent that extracts relevant portions of the company policy based on the nature of the bill.
"""
demo = gr.Interface(
fn=claim_generator,
inputs=gr.Image(type="filepath", label="Upload an image of the bill"),
outputs=[gr.Markdown(label="Expense Claim Document", show_copy_button=True, container=True),
gr.File(label="Download your claim document", show_label=True)],
title="Expense Claim Document Generator",
description=agentic_workflow_description,
article=agentic_workflow_representation,
examples='images',
cache_examples=False,
theme=gr.themes.Base(),
concurrency_limit=16
)
demo.queue()
demo.launch(auth=("johndoe", os.getenv('PASSWD')), ssr_mode=False) |