Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -96,7 +96,7 @@ def process_uploaded_file(file):
|
|
96 |
doc_state.doc_type = 'pdf'
|
97 |
try:
|
98 |
doc_state.current_doc_images, doc_state.current_doc_text = process_pdf_file(file_path)
|
99 |
-
return f"PDF processed successfully. Total pages: {len(doc_state.current_doc_images)}. You can now
|
100 |
except Exception as e:
|
101 |
return f"Error processing PDF: {str(e)}. Please try a different PDF file."
|
102 |
elif file_ext in image_extensions:
|
@@ -109,7 +109,7 @@ def process_uploaded_file(file):
|
|
109 |
new_size = tuple(int(dim * ratio) for dim in img.size)
|
110 |
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
111 |
doc_state.current_doc_images = [img]
|
112 |
-
return "Image loaded successfully. You can now
|
113 |
except Exception as e:
|
114 |
return f"Error processing image: {str(e)}. Please try a different image file."
|
115 |
else:
|
@@ -118,161 +118,106 @@ def process_uploaded_file(file):
|
|
118 |
logger.error(f"Error in process_uploaded_file: {str(e)}")
|
119 |
return "An error occurred while processing the file. Please try again."
|
120 |
|
|
|
|
|
|
|
|
|
|
|
121 |
# -------------------------------
|
122 |
-
#
|
123 |
# -------------------------------
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
try:
|
132 |
-
# Predetermined prompts (you can adjust these as needed)
|
133 |
-
prompts = {
|
134 |
-
"NOC Timesheet": (
|
135 |
-
"""Extract structured information from the provided timesheet. The extracted details should include:
|
136 |
-
|
137 |
-
Name
|
138 |
-
|
139 |
-
Position Title
|
140 |
-
|
141 |
-
Work Location
|
142 |
-
|
143 |
-
Contractor
|
144 |
-
|
145 |
-
NOC ID
|
146 |
-
|
147 |
-
Month and Year
|
148 |
-
|
149 |
-
Regular Service Days (ONSHORE)
|
150 |
-
|
151 |
-
Standby Days (ONSHORE in Doha)
|
152 |
-
|
153 |
-
Offshore Days
|
154 |
-
|
155 |
-
Standby & Extended Hitch Days (OFFSHORE)
|
156 |
-
|
157 |
-
Extended Hitch Days (ONSHORE Rotational)
|
158 |
-
|
159 |
-
Service during Weekends & Public Holidays
|
160 |
-
|
161 |
-
ONSHORE Overtime Hours (Over 8 hours)
|
162 |
-
|
163 |
-
OFFSHORE Overtime Hours (Over 12 hours)
|
164 |
-
|
165 |
-
Per Diem Days (ONSHORE/OFFSHORE Rotational Personnel)
|
166 |
-
|
167 |
-
Training Days
|
168 |
-
|
169 |
-
Travel Days
|
170 |
-
|
171 |
-
Noc representative appoval's name as approved_by
|
172 |
-
|
173 |
-
Noc representative's date approval_date
|
174 |
-
|
175 |
-
Noc representative status as approval_status
|
176 |
-
|
177 |
-
Format the output as valid JSON.
|
178 |
-
"""
|
179 |
-
),
|
180 |
-
"NOC Basic": (
|
181 |
-
"Based on the provided timesheet details, extract the following information:\n"
|
182 |
-
" - Full name\n"
|
183 |
-
" - Position title\n"
|
184 |
-
" - Work location\n"
|
185 |
-
" - Contractor's name\n"
|
186 |
-
" - NOC ID\n"
|
187 |
-
" - Month and year (MM/YYYY)"
|
188 |
-
),
|
189 |
-
"Aramco Full structured": (
|
190 |
-
"""You are a document parsing assistant designed to extract structured data from various documents such as invoices, timesheets, purchase orders, and travel bookings. Return only valid JSON with no extra text.
|
191 |
-
"""
|
192 |
-
),
|
193 |
-
"Aramco Timesheet only": (
|
194 |
-
"""Extract time tracking, work details, and approvals.
|
195 |
-
Return a JSON object following the specified structure.
|
196 |
-
"""
|
197 |
-
),
|
198 |
-
"NOC Invoice": (
|
199 |
-
"""You are a highly accurate data extraction system. Analyze the provided invoice image and extract all data into the following JSON format:
|
200 |
-
{
|
201 |
-
"invoiceDetails": { ... },
|
202 |
-
"from": { ... },
|
203 |
-
"to": { ... },
|
204 |
-
"services": [ ... ],
|
205 |
-
"totals": { ... },
|
206 |
-
"bankDetails": { ... }
|
207 |
}
|
208 |
-
"""
|
209 |
-
)
|
210 |
-
}
|
211 |
-
|
212 |
-
# Select the appropriate prompt
|
213 |
-
selected_prompt = prompts.get(prompt_option, "Invalid prompt selected.")
|
214 |
-
context = ""
|
215 |
-
if doc_state.current_doc_images and doc_state.current_doc_text:
|
216 |
-
context = "\nDocument context:\n" + doc_state.current_doc_text
|
217 |
-
full_prompt = selected_prompt + context
|
218 |
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
# If
|
234 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
buffered = io.BytesIO()
|
236 |
doc_state.current_doc_images[0].save(buffered, format="PNG")
|
237 |
img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
238 |
-
# Create a data URI (many APIs accept this format in place of a public URL)
|
239 |
data_uri = f"data:image/png;base64,{img_b64}"
|
240 |
-
|
241 |
"type": "image_url",
|
242 |
"image_url": {"url": data_uri}
|
243 |
})
|
244 |
-
|
245 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
stream = client.chat.completions.create(
|
247 |
-
model="
|
248 |
messages=messages,
|
249 |
-
max_tokens=
|
250 |
stream=True
|
251 |
)
|
252 |
-
|
253 |
-
buffer = ""
|
254 |
-
for chunk in stream:
|
255 |
-
# The response structure is similar to the reference: each chunk contains a delta.
|
256 |
-
delta = chunk.choices[0].delta.content
|
257 |
-
buffer += delta
|
258 |
-
time.sleep(0.01)
|
259 |
-
yield buffer
|
260 |
-
|
261 |
except Exception as e:
|
262 |
-
logger.error(f"Error
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
# -------------------------------
|
271 |
# Create the Gradio Interface
|
272 |
# -------------------------------
|
273 |
with gr.Blocks() as demo:
|
274 |
-
gr.Markdown("# Document Analyzer
|
275 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
276 |
|
277 |
with gr.Row():
|
278 |
file_upload = gr.File(
|
@@ -284,16 +229,32 @@ with gr.Blocks() as demo:
|
|
284 |
with gr.Row():
|
285 |
prompt_dropdown = gr.Dropdown(
|
286 |
label="Select Prompt",
|
287 |
-
choices=[
|
288 |
-
|
|
|
|
|
289 |
)
|
290 |
-
|
|
|
|
|
291 |
|
292 |
-
|
293 |
-
|
|
|
294 |
|
295 |
-
|
296 |
-
|
297 |
-
clear_btn.click(fn=clear_context, outputs=[upload_status])
|
298 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
demo.launch(debug=True)
|
|
|
96 |
doc_state.doc_type = 'pdf'
|
97 |
try:
|
98 |
doc_state.current_doc_images, doc_state.current_doc_text = process_pdf_file(file_path)
|
99 |
+
return f"PDF processed successfully. Total pages: {len(doc_state.current_doc_images)}. You can now chat with the bot."
|
100 |
except Exception as e:
|
101 |
return f"Error processing PDF: {str(e)}. Please try a different PDF file."
|
102 |
elif file_ext in image_extensions:
|
|
|
109 |
new_size = tuple(int(dim * ratio) for dim in img.size)
|
110 |
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
111 |
doc_state.current_doc_images = [img]
|
112 |
+
return "Image loaded successfully. You can now chat with the bot."
|
113 |
except Exception as e:
|
114 |
return f"Error processing image: {str(e)}. Please try a different image file."
|
115 |
else:
|
|
|
118 |
logger.error(f"Error in process_uploaded_file: {str(e)}")
|
119 |
return "An error occurred while processing the file. Please try again."
|
120 |
|
121 |
+
def clear_context():
|
122 |
+
"""Clear the current document context and chat history."""
|
123 |
+
doc_state.clear()
|
124 |
+
return "Document context cleared. You can upload a new document.", []
|
125 |
+
|
126 |
# -------------------------------
|
127 |
+
# Predetermined Prompts
|
128 |
# -------------------------------
|
129 |
+
predetermined_prompts = {
|
130 |
+
|
131 |
+
"Software Tester": (
|
132 |
+
"Act as a software tester. Analyze the uploaded image of a software interface and generate comprehensive "
|
133 |
+
"test cases for its features. For each feature, provide test steps, expected results, and any necessary "
|
134 |
+
"preconditions. Be as detailed as possible."
|
135 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
+
# -------------------------------
|
139 |
+
# Chat Function with Streaming and Conversation History
|
140 |
+
# -------------------------------
|
141 |
+
def chat_respond(user_message, history, prompt_option):
|
142 |
+
"""
|
143 |
+
Append the user message (or, if starting a new conversation and no message is provided,
|
144 |
+
use the predetermined prompt) to the conversation history; build the API call using
|
145 |
+
the full conversation history (and the image if available); stream back the assistant response
|
146 |
+
while updating the history.
|
147 |
+
|
148 |
+
The history is a list of [user_text, assistant_text] pairs.
|
149 |
+
"""
|
150 |
+
# If this is the first message, add the predetermined prompt text.
|
151 |
+
if history == []:
|
152 |
+
# If user_message is empty, use the predetermined prompt.
|
153 |
+
if not user_message.strip():
|
154 |
+
user_message = predetermined_prompts.get(prompt_option, "Hello")
|
155 |
+
else:
|
156 |
+
# Optionally, prepend the predetermined prompt.
|
157 |
+
user_message = predetermined_prompts.get(prompt_option, "") + "\n" + user_message
|
158 |
+
|
159 |
+
# Append the new user message with an empty assistant response.
|
160 |
+
history = history + [[user_message, ""]]
|
161 |
+
|
162 |
+
# Build the messages list (for the multimodal API) from the conversation history.
|
163 |
+
messages = []
|
164 |
+
for i, (user_msg, assistant_msg) in enumerate(history):
|
165 |
+
# For the user message:
|
166 |
+
user_content = [{"type": "text", "text": user_msg}]
|
167 |
+
# For the very first user message, if an image was uploaded, append the image.
|
168 |
+
if i == 0 and doc_state.current_doc_images:
|
169 |
buffered = io.BytesIO()
|
170 |
doc_state.current_doc_images[0].save(buffered, format="PNG")
|
171 |
img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
|
172 |
data_uri = f"data:image/png;base64,{img_b64}"
|
173 |
+
user_content.append({
|
174 |
"type": "image_url",
|
175 |
"image_url": {"url": data_uri}
|
176 |
})
|
177 |
+
messages.append({"role": "user", "content": user_content})
|
178 |
+
# For the assistant response, if available.
|
179 |
+
if assistant_msg:
|
180 |
+
messages.append({
|
181 |
+
"role": "assistant",
|
182 |
+
"content": [{"type": "text", "text": assistant_msg}]
|
183 |
+
})
|
184 |
+
|
185 |
+
# Call the inference API with streaming enabled.
|
186 |
+
try:
|
187 |
stream = client.chat.completions.create(
|
188 |
+
model="google/gemini-2.0-pro-exp-02-05:free",
|
189 |
messages=messages,
|
190 |
+
max_tokens=8192,
|
191 |
stream=True
|
192 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
except Exception as e:
|
194 |
+
logger.error(f"Error calling the API: {str(e)}")
|
195 |
+
history[-1][1] = "An error occurred while processing your request. Please try again."
|
196 |
+
yield history, history
|
197 |
+
|
198 |
+
# Stream and update the assistant's reply token by token.
|
199 |
+
buffer = ""
|
200 |
+
for chunk in stream:
|
201 |
+
delta = chunk.choices[0].delta.content
|
202 |
+
buffer += delta
|
203 |
+
# Update the assistant part of the latest message in the history.
|
204 |
+
history[-1][1] = buffer
|
205 |
+
# Yield the updated chat history (for the Chatbot component) and the state.
|
206 |
+
yield history, history
|
207 |
+
time.sleep(0.01)
|
208 |
+
|
209 |
+
return history, history
|
210 |
|
211 |
# -------------------------------
|
212 |
# Create the Gradio Interface
|
213 |
# -------------------------------
|
214 |
with gr.Blocks() as demo:
|
215 |
+
gr.Markdown("# Document Analyzer & Software Testing Chatbot")
|
216 |
+
gr.Markdown(
|
217 |
+
"Upload a PDF or an image (PNG, JPG, JPEG, GIF, BMP, WEBP). Then choose a prompt from the dropdown. "
|
218 |
+
"For example, select **Software Tester** to have the bot analyze an image of a software interface "
|
219 |
+
"and generate test cases. Chat with the bot in the conversation below."
|
220 |
+
)
|
221 |
|
222 |
with gr.Row():
|
223 |
file_upload = gr.File(
|
|
|
229 |
with gr.Row():
|
230 |
prompt_dropdown = gr.Dropdown(
|
231 |
label="Select Prompt",
|
232 |
+
choices=[
|
233 |
+
"Software Tester"
|
234 |
+
],
|
235 |
+
value="Software Tester"
|
236 |
)
|
237 |
+
clear_btn = gr.Button("Clear Document Context & Chat History")
|
238 |
+
|
239 |
+
chatbot = gr.Chatbot(label="Chat History", elem_id="chatbot")
|
240 |
|
241 |
+
with gr.Row():
|
242 |
+
user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...", show_label=False)
|
243 |
+
send_btn = gr.Button("Send")
|
244 |
|
245 |
+
# State to hold the conversation history
|
246 |
+
chat_state = gr.State([])
|
|
|
247 |
|
248 |
+
# When a file is uploaded, process it.
|
249 |
+
file_upload.change(fn=process_uploaded_file, inputs=file_upload, outputs=upload_status)
|
250 |
+
|
251 |
+
# Clear both the document context and chat history.
|
252 |
+
clear_btn.click(fn=clear_context, outputs=[upload_status, chat_state])
|
253 |
+
|
254 |
+
# When the user clicks Send, process the message and update the chat.
|
255 |
+
send_btn.click(fn=chat_respond,
|
256 |
+
inputs=[user_input, chat_state, prompt_dropdown],
|
257 |
+
outputs=[chatbot, chat_state],
|
258 |
+
stream=True)
|
259 |
+
|
260 |
demo.launch(debug=True)
|