Ali2206 commited on
Commit
f10bfab
Β·
verified Β·
1 Parent(s): b4dbed8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -4
app.py CHANGED
@@ -8,6 +8,7 @@ import time
8
  from datetime import datetime
9
  from typing import List, Tuple, Dict, Union
10
  import pandas as pd
 
11
  import gradio as gr
12
  import torch
13
 
@@ -63,6 +64,42 @@ def extract_text_from_excel(path: str) -> str:
63
  all_text.append(f"[{sheet_name}] {text_line}")
64
  return "\n".join(all_text)
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
67
  effective_limit = max_tokens - PROMPT_OVERHEAD
68
  chunks, current, current_tokens = [], [], 0
@@ -129,7 +166,7 @@ def analyze_batches(agent, batches: List[List[str]]) -> List[str]:
129
  time.sleep(SAFE_SLEEP)
130
  except Exception as e:
131
  results.append(f"❌ Batch failed: {str(e)}")
132
- time.sleep(SAFE_SLEEP * 2) # longer sleep on error
133
  torch.cuda.empty_cache()
134
  gc.collect()
135
  return results
@@ -158,12 +195,16 @@ def generate_final_summary(agent, combined: str) -> str:
158
 
159
  def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
160
  if not file or not hasattr(file, "name"):
161
- messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file."})
162
  return messages, None
163
 
164
  messages.append({"role": "user", "content": f"πŸ“‚ Processing file: {os.path.basename(file.name)}"})
165
  try:
166
- extracted = extract_text_from_excel(file.name)
 
 
 
 
167
  chunks = split_text(extracted)
168
  batches = batch_chunks(chunks, batch_size=BATCH_SIZE)
169
  messages.append({"role": "assistant", "content": f"πŸ” Split into {len(batches)} batches. Analyzing..."})
@@ -211,7 +252,7 @@ def create_ui(agent):
211
  """)
212
  with gr.Column():
213
  chatbot = gr.Chatbot(label="CPS Assistant", height=700, type="messages")
214
- upload = gr.File(label="Upload Medical File", file_types=[".xlsx"])
215
  analyze = gr.Button("🧠 Analyze")
216
  download = gr.File(label="Download Report", visible=False, interactive=False)
217
 
 
8
  from datetime import datetime
9
  from typing import List, Tuple, Dict, Union
10
  import pandas as pd
11
+ import pdfplumber
12
  import gradio as gr
13
  import torch
14
 
 
64
  all_text.append(f"[{sheet_name}] {text_line}")
65
  return "\n".join(all_text)
66
 
67
+ def extract_text_from_csv(path: str) -> str:
68
+ all_text = []
69
+ try:
70
+ df = pd.read_csv(path).astype(str).fillna("")
71
+ except Exception:
72
+ return ""
73
+ for idx, row in df.iterrows():
74
+ non_empty = [cell.strip() for cell in row if cell.strip()]
75
+ if len(non_empty) >= 2:
76
+ text_line = " | ".join(non_empty)
77
+ if len(text_line) > 15:
78
+ all_text.append(text_line)
79
+ return "\n".join(all_text)
80
+
81
+ def extract_text_from_pdf(path: str) -> str:
82
+ all_text = []
83
+ try:
84
+ with pdfplumber.open(path) as pdf:
85
+ for page in pdf.pages:
86
+ text = page.extract_text()
87
+ if text:
88
+ all_text.append(text.strip())
89
+ except Exception:
90
+ return ""
91
+ return "\n".join(all_text)
92
+
93
+ def extract_text(file_path: str) -> str:
94
+ if file_path.endswith(".xlsx"):
95
+ return extract_text_from_excel(file_path)
96
+ elif file_path.endswith(".csv"):
97
+ return extract_text_from_csv(file_path)
98
+ elif file_path.endswith(".pdf"):
99
+ return extract_text_from_pdf(file_path)
100
+ else:
101
+ return ""
102
+
103
  def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
104
  effective_limit = max_tokens - PROMPT_OVERHEAD
105
  chunks, current, current_tokens = [], [], 0
 
166
  time.sleep(SAFE_SLEEP)
167
  except Exception as e:
168
  results.append(f"❌ Batch failed: {str(e)}")
169
+ time.sleep(SAFE_SLEEP * 2)
170
  torch.cuda.empty_cache()
171
  gc.collect()
172
  return results
 
195
 
196
  def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
197
  if not file or not hasattr(file, "name"):
198
+ messages.append({"role": "assistant", "content": "❌ Please upload a valid file."})
199
  return messages, None
200
 
201
  messages.append({"role": "user", "content": f"πŸ“‚ Processing file: {os.path.basename(file.name)}"})
202
  try:
203
+ extracted = extract_text(file.name)
204
+ if not extracted:
205
+ messages.append({"role": "assistant", "content": "❌ Could not extract text."})
206
+ return messages, None
207
+
208
  chunks = split_text(extracted)
209
  batches = batch_chunks(chunks, batch_size=BATCH_SIZE)
210
  messages.append({"role": "assistant", "content": f"πŸ” Split into {len(batches)} batches. Analyzing..."})
 
252
  """)
253
  with gr.Column():
254
  chatbot = gr.Chatbot(label="CPS Assistant", height=700, type="messages")
255
+ upload = gr.File(label="Upload Medical File", file_types=[".xlsx", ".csv", ".pdf"])
256
  analyze = gr.Button("🧠 Analyze")
257
  download = gr.File(label="Download Report", visible=False, interactive=False)
258