Daemontatox commited on
Commit
7c08af8
·
verified ·
1 Parent(s): 7179ded

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -28
app.py CHANGED
@@ -125,26 +125,80 @@ def process_uploaded_file(file):
125
  return "An error occurred while processing the file. Please try again."
126
 
127
  @spaces.GPU()
128
- def bot_streaming(message, history, max_new_tokens=8192):
129
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  messages = []
131
 
132
- # Process history
133
- for i, msg in enumerate(history):
134
- try:
135
- messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
136
- messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
137
- except Exception as e:
138
- logger.error(f"Error processing history message {i}: {str(e)}")
139
- continue
140
-
141
  # Include document context
142
  if doc_state.current_doc_images:
143
  context = f"\nDocument context:\n{doc_state.current_doc_text}" if doc_state.current_doc_text else ""
144
- current_msg = f"{message}{context}"
145
  messages.append({"role": "user", "content": [{"type": "text", "text": current_msg}, {"type": "image"}]})
146
  else:
147
- messages.append({"role": "user", "content": [{"type": "text", "text": message}]})
148
 
149
  # Process inputs
150
  texts = processor.apply_chat_template(messages, add_generation_prompt=True)
@@ -186,8 +240,8 @@ def clear_context():
186
 
187
  # Create the Gradio interface
188
  with gr.Blocks() as demo:
189
- gr.Markdown("# Document Analyzer with Chat Support")
190
- gr.Markdown("Upload a PDF or image (PNG, JPG, JPEG, GIF, BMP, WEBP) and chat about its contents.")
191
 
192
  with gr.Row():
193
  file_upload = gr.File(
@@ -199,22 +253,23 @@ with gr.Blocks() as demo:
199
  interactive=False
200
  )
201
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  clear_btn = gr.Button("Clear Document Context")
203
 
204
- chatbot = gr.ChatInterface(
205
- fn=bot_streaming,
206
- title="Document Chat",
207
- additional_inputs=[
208
- gr.Slider(
209
- minimum=10,
210
- maximum=2048,
211
- value=8192,
212
- step=10,
213
- label="Maximum number of new tokens to generate",
214
- )
215
- ],
216
- stop_btn="Stop Generation",
217
- fill_height=True
218
  )
219
 
220
  file_upload.change(
@@ -223,6 +278,12 @@ with gr.Blocks() as demo:
223
  outputs=[upload_status]
224
  )
225
 
 
 
 
 
 
 
226
  clear_btn.click(
227
  fn=clear_context,
228
  outputs=[upload_status]
 
125
  return "An error occurred while processing the file. Please try again."
126
 
127
  @spaces.GPU()
128
+ def bot_streaming(prompt_option, max_new_tokens=8192):
129
  try:
130
+ # Define predetermined prompts
131
+ prompts = {
132
+ "Timesheet Details (Full Extraction)": (
133
+ "Based on the provided timesheet details, extract the following information:\n"
134
+ " - Full name of the person\n"
135
+ " - Position title of the person\n"
136
+ " - Work location\n"
137
+ " - Contractor's name\n"
138
+ " - NOC ID\n"
139
+ " - Month and year (in MM/YYYY format)\n"
140
+ "And from the bottom table:\n"
141
+ " - Number of service days onshore\n"
142
+ " - Number of standby days onshore in Doha\n"
143
+ " - Number of service days offshore\n"
144
+ " - Number of service days during weekends or public holidays\n"
145
+ " - Number of standby and extended hitch days offshore\n"
146
+ " - Number of extended hitch days onshore for rotational personnel\n"
147
+ " - Number of Per Diem days for onshore/offshore rotational personnel\n"
148
+ " - Number of training days\n"
149
+ " - Number of travel days\n"
150
+ " - Number of offshore days"
151
+ ),
152
+ "Timesheet Details (Basic Extraction)": (
153
+ "Based on the provided timesheet details, extract the following information:\n"
154
+ " - Full name of the person\n"
155
+ " - Position title of the person\n"
156
+ " - Work location\n"
157
+ " - Contractor's name\n"
158
+ " - NOC ID\n"
159
+ " - Month and year (in MM/YYYY format)"
160
+ ),
161
+ "Structured Data Extraction": (
162
+ "You are an advanced data extraction assistant. Your task is to parse structured input text and extract key data points into clearly defined categories. Focus only on the requested details, ensuring accuracy and proper grouping. Below is the format for extracting the data:\n\n"
163
+ "---\n"
164
+ "Project Information\n\n"
165
+ "Project Name:\n\n"
166
+ "Project and Package:\n\n"
167
+ "RPO Number:\n\n"
168
+ "PMC Name:\n\n"
169
+ "Project Location:\n\n"
170
+ "Year:\n\n"
171
+ "Month:\n\n"
172
+ "Timesheet Details\n\n"
173
+ "Week X (Date)\n\n"
174
+ "Holidays:\n\n"
175
+ "Regular Hours:\n\n"
176
+ "Overtime Hours:\n\n"
177
+ "Total Hours:\n\n"
178
+ "Comments:\n\n"
179
+ "Additional Data\n\n"
180
+ "Reviewed By:\n\n"
181
+ "Date of Review:\n\n"
182
+ "Position:\n\n"
183
+ "Supervisor Business:\n\n"
184
+ "Date of Approval:\n\n"
185
+ "---\n\n"
186
+ "Ensure the extracted data strictly follows the format above and is organized by category. Ignore unrelated text. Respond only with the formatted output."
187
+ )
188
+ }
189
+
190
+ # Get the selected prompt
191
+ selected_prompt = prompts.get(prompt_option, "Invalid prompt selected.")
192
+
193
  messages = []
194
 
 
 
 
 
 
 
 
 
 
195
  # Include document context
196
  if doc_state.current_doc_images:
197
  context = f"\nDocument context:\n{doc_state.current_doc_text}" if doc_state.current_doc_text else ""
198
+ current_msg = f"{selected_prompt}{context}"
199
  messages.append({"role": "user", "content": [{"type": "text", "text": current_msg}, {"type": "image"}]})
200
  else:
201
+ messages.append({"role": "user", "content": [{"type": "text", "text": selected_prompt}]})
202
 
203
  # Process inputs
204
  texts = processor.apply_chat_template(messages, add_generation_prompt=True)
 
240
 
241
  # Create the Gradio interface
242
  with gr.Blocks() as demo:
243
+ gr.Markdown("# Document Analyzer with Predetermined Prompts")
244
+ gr.Markdown("Upload a PDF or image (PNG, JPG, JPEG, GIF, BMP, WEBP) and select a prompt to analyze its contents.")
245
 
246
  with gr.Row():
247
  file_upload = gr.File(
 
253
  interactive=False
254
  )
255
 
256
+ with gr.Row():
257
+ prompt_dropdown = gr.Dropdown(
258
+ label="Select Prompt",
259
+ choices=[
260
+ "Timesheet Details (Full Extraction)",
261
+ "Timesheet Details (Basic Extraction)",
262
+ "Structured Data Extraction"
263
+ ],
264
+ value="Timesheet Details (Full Extraction)"
265
+ )
266
+ generate_btn = gr.Button("Generate")
267
+
268
  clear_btn = gr.Button("Clear Document Context")
269
 
270
+ output_text = gr.Textbox(
271
+ label="Output",
272
+ interactive=False
 
 
 
 
 
 
 
 
 
 
 
273
  )
274
 
275
  file_upload.change(
 
278
  outputs=[upload_status]
279
  )
280
 
281
+ generate_btn.click(
282
+ fn=bot_streaming,
283
+ inputs=[prompt_dropdown],
284
+ outputs=[output_text]
285
+ )
286
+
287
  clear_btn.click(
288
  fn=clear_context,
289
  outputs=[upload_status]