Ali2206 commited on
Commit
65e7d58
·
verified ·
1 Parent(s): 422a1e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -55
app.py CHANGED
@@ -96,16 +96,12 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
96
  return f"PDF processing error: {str(e)}"
97
 
98
  def excel_to_json(file_path: str) -> List[Dict]:
99
- """Convert Excel file to JSON with optimized processing"""
100
  try:
101
- # First try with openpyxl (faster for xlsx)
102
  try:
103
  df = pd.read_excel(file_path, engine='openpyxl', header=None, dtype=str)
104
  except Exception:
105
- # Fall back to xlrd if needed
106
  df = pd.read_excel(file_path, engine='xlrd', header=None, dtype=str)
107
 
108
- # Convert to list of lists with null handling
109
  content = df.where(pd.notnull(df), "").astype(str).values.tolist()
110
 
111
  return [{
@@ -118,9 +114,7 @@ def excel_to_json(file_path: str) -> List[Dict]:
118
  return [{"error": f"Error processing Excel file: {str(e)}"}]
119
 
120
  def csv_to_json(file_path: str) -> List[Dict]:
121
- """Convert CSV file to JSON with optimized processing"""
122
  try:
123
- # Read CSV in chunks if large
124
  chunks = []
125
  for chunk in pd.read_csv(
126
  file_path,
@@ -145,7 +139,6 @@ def csv_to_json(file_path: str) -> List[Dict]:
145
  return [{"error": f"Error processing CSV file: {str(e)}"}]
146
 
147
  def process_file(file_path: str, file_type: str) -> List[Dict]:
148
- """Process file based on type and return JSON data"""
149
  try:
150
  if file_type == "pdf":
151
  text = extract_all_pages(file_path)
@@ -166,7 +159,6 @@ def process_file(file_path: str, file_type: str) -> List[Dict]:
166
  return [{"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"}]
167
 
168
  def tokenize_and_chunk(text: str, max_tokens: int = 1800) -> List[str]:
169
- """Split text into chunks based on token count"""
170
  tokens = tokenizer.encode(text)
171
  chunks = []
172
  for i in range(0, len(tokens), max_tokens):
@@ -281,7 +273,6 @@ def init_agent():
281
  return agent
282
 
283
  def process_response_stream(prompt: str, history: List[dict]) -> Generator[dict, None, None]:
284
- """Process a single prompt and stream the response"""
285
  full_response = ""
286
  for chunk_output in agent.run_gradio_chat(prompt, [], 0.2, 512, 2048, False, []):
287
  if chunk_output is None:
@@ -302,20 +293,17 @@ def process_response_stream(prompt: str, history: List[dict]) -> Generator[dict,
302
 
303
  return full_response
304
 
305
- def analyze(message: str, history: List[dict], files: List) -> Generator[Dict[str, Any], None, None]:
306
  # Initialize outputs
307
- outputs = {
308
- "chatbot": history.copy(),
309
- "download_output": None,
310
- "final_summary": "",
311
- "progress_text": {"value": "Starting analysis...", "visible": True}
312
- }
313
 
314
  try:
315
  # Start with user message
316
- history.append({"role": "user", "content": message})
317
- outputs["chatbot"] = history
318
- yield outputs
319
 
320
  extracted = []
321
  file_hash_value = ""
@@ -331,19 +319,16 @@ def analyze(message: str, history: List[dict], files: List) -> Generator[Dict[st
331
  for i, future in enumerate(as_completed(futures), 1):
332
  try:
333
  extracted.extend(future.result())
334
- outputs["progress_text"] = update_progress(i, len(files), "Processing files")
335
- yield outputs
336
  except Exception as e:
337
  logger.error(f"File processing error: {e}")
338
  extracted.append({"error": f"Error processing file: {str(e)}"})
339
 
340
  file_hash_value = file_hash(files[0].name) if files else ""
341
- history.append({"role": "assistant", "content": "✅ File processing complete"})
342
- outputs.update({
343
- "chatbot": history,
344
- "progress_text": update_progress(len(files), len(files), "Files processed")
345
- })
346
- yield outputs
347
 
348
  # Convert extracted data to JSON text
349
  text_content = "\n".join(json.dumps(item) for item in extracted)
@@ -361,23 +346,17 @@ Patient Record Excerpt (Chunk {chunk_idx} of {len(chunks)}):
361
  """
362
 
363
  # Create a placeholder message
364
- history.append({"role": "assistant", "content": ""})
365
- outputs.update({
366
- "chatbot": history,
367
- "progress_text": update_progress(chunk_idx, len(chunks), "Analyzing")
368
- })
369
- yield outputs
370
 
371
  # Process and stream the response
372
  chunk_response = ""
373
- for update in process_response_stream(prompt, history):
374
- history[-1] = update
375
  chunk_response = update["content"]
376
- outputs.update({
377
- "chatbot": history,
378
- "progress_text": update_progress(chunk_idx, len(chunks), "Analyzing")
379
- })
380
- yield outputs
381
 
382
  combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
383
 
@@ -386,28 +365,22 @@ Patient Record Excerpt (Chunk {chunk_idx} of {len(chunks)}):
386
  gc.collect()
387
 
388
  # Generate final summary
389
- summary = summarize_findings(combined_response)
390
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
391
  if report_path:
392
  with open(report_path, "w", encoding="utf-8") as f:
393
- f.write(combined_response + "\n\n" + summary)
394
 
395
- outputs.update({
396
- "download_output": report_path if report_path and os.path.exists(report_path) else None,
397
- "final_summary": summary,
398
- "progress_text": {"visible": False}
399
- })
400
- yield outputs
401
 
402
  except Exception as e:
403
  logger.error("Analysis error: %s", e)
404
- history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
405
- outputs.update({
406
- "chatbot": history,
407
- "final_summary": f"Error occurred during analysis: {str(e)}",
408
- "progress_text": {"visible": False}
409
- })
410
- yield outputs
411
 
412
  def clear_and_start():
413
  return [
@@ -433,6 +406,7 @@ def create_ui(agent):
433
  "assets/user.png",
434
  "assets/assistant.png"
435
  ) if os.path.exists("assets/user.png") else None,
 
436
  render=False
437
  )
438
  with gr.Column(scale=1):
 
96
  return f"PDF processing error: {str(e)}"
97
 
98
  def excel_to_json(file_path: str) -> List[Dict]:
 
99
  try:
 
100
  try:
101
  df = pd.read_excel(file_path, engine='openpyxl', header=None, dtype=str)
102
  except Exception:
 
103
  df = pd.read_excel(file_path, engine='xlrd', header=None, dtype=str)
104
 
 
105
  content = df.where(pd.notnull(df), "").astype(str).values.tolist()
106
 
107
  return [{
 
114
  return [{"error": f"Error processing Excel file: {str(e)}"}]
115
 
116
  def csv_to_json(file_path: str) -> List[Dict]:
 
117
  try:
 
118
  chunks = []
119
  for chunk in pd.read_csv(
120
  file_path,
 
139
  return [{"error": f"Error processing CSV file: {str(e)}"}]
140
 
141
  def process_file(file_path: str, file_type: str) -> List[Dict]:
 
142
  try:
143
  if file_type == "pdf":
144
  text = extract_all_pages(file_path)
 
159
  return [{"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"}]
160
 
161
  def tokenize_and_chunk(text: str, max_tokens: int = 1800) -> List[str]:
 
162
  tokens = tokenizer.encode(text)
163
  chunks = []
164
  for i in range(0, len(tokens), max_tokens):
 
273
  return agent
274
 
275
  def process_response_stream(prompt: str, history: List[dict]) -> Generator[dict, None, None]:
 
276
  full_response = ""
277
  for chunk_output in agent.run_gradio_chat(prompt, [], 0.2, 512, 2048, False, []):
278
  if chunk_output is None:
 
293
 
294
  return full_response
295
 
296
+ def analyze(message: str, history: List[dict], files: List) -> Generator[tuple, None, None]:
297
  # Initialize outputs
298
+ chatbot_output = history.copy()
299
+ download_output = None
300
+ final_summary = ""
301
+ progress_text = {"value": "Starting analysis...", "visible": True}
 
 
302
 
303
  try:
304
  # Start with user message
305
+ chatbot_output.append({"role": "user", "content": message})
306
+ yield (chatbot_output, download_output, final_summary, progress_text)
 
307
 
308
  extracted = []
309
  file_hash_value = ""
 
319
  for i, future in enumerate(as_completed(futures), 1):
320
  try:
321
  extracted.extend(future.result())
322
+ progress_text = update_progress(i, len(files), "Processing files")
323
+ yield (chatbot_output, download_output, final_summary, progress_text)
324
  except Exception as e:
325
  logger.error(f"File processing error: {e}")
326
  extracted.append({"error": f"Error processing file: {str(e)}"})
327
 
328
  file_hash_value = file_hash(files[0].name) if files else ""
329
+ chatbot_output.append({"role": "assistant", "content": "✅ File processing complete"})
330
+ progress_text = update_progress(len(files), len(files), "Files processed")
331
+ yield (chatbot_output, download_output, final_summary, progress_text)
 
 
 
332
 
333
  # Convert extracted data to JSON text
334
  text_content = "\n".join(json.dumps(item) for item in extracted)
 
346
  """
347
 
348
  # Create a placeholder message
349
+ chatbot_output.append({"role": "assistant", "content": ""})
350
+ progress_text = update_progress(chunk_idx, len(chunks), "Analyzing")
351
+ yield (chatbot_output, download_output, final_summary, progress_text)
 
 
 
352
 
353
  # Process and stream the response
354
  chunk_response = ""
355
+ for update in process_response_stream(prompt, chatbot_output):
356
+ chatbot_output[-1] = update
357
  chunk_response = update["content"]
358
+ progress_text = update_progress(chunk_idx, len(chunks), "Analyzing")
359
+ yield (chatbot_output, download_output, final_summary, progress_text)
 
 
 
360
 
361
  combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
362
 
 
365
  gc.collect()
366
 
367
  # Generate final summary
368
+ final_summary = summarize_findings(combined_response)
369
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
370
  if report_path:
371
  with open(report_path, "w", encoding="utf-8") as f:
372
+ f.write(combined_response + "\n\n" + final_summary)
373
 
374
+ download_output = report_path if report_path and os.path.exists(report_path) else None
375
+ progress_text = {"visible": False}
376
+ yield (chatbot_output, download_output, final_summary, progress_text)
 
 
 
377
 
378
  except Exception as e:
379
  logger.error("Analysis error: %s", e)
380
+ chatbot_output.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
381
+ final_summary = f"Error occurred during analysis: {str(e)}"
382
+ progress_text = {"visible": False}
383
+ yield (chatbot_output, download_output, final_summary, progress_text)
 
 
 
384
 
385
  def clear_and_start():
386
  return [
 
406
  "assets/user.png",
407
  "assets/assistant.png"
408
  ) if os.path.exists("assets/user.png") else None,
409
+ type="messages", # Use openai-style messages
410
  render=False
411
  )
412
  with gr.Column(scale=1):