Ali2206 commited on
Commit
8d40b58
Β·
verified Β·
1 Parent(s): 96bb421

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -31
app.py CHANGED
@@ -37,7 +37,7 @@ MAX_NEW_TOKENS = 4096
37
  MAX_CHUNK_TOKENS = 8192
38
  BATCH_SIZE = 2
39
  PROMPT_OVERHEAD = 300
40
- SAFE_SLEEP = 0.5 # seconds between batches
41
 
42
  # === Utility Functions ===
43
  def estimate_tokens(text: str) -> int:
@@ -48,6 +48,17 @@ def clean_response(text: str) -> str:
48
  text = re.sub(r"\n{3,}", "\n\n", text)
49
  return text.strip()
50
 
 
 
 
 
 
 
 
 
 
 
 
51
  def extract_text_from_excel(path: str) -> str:
52
  all_text = []
53
  xls = pd.ExcelFile(path)
@@ -79,6 +90,9 @@ def extract_text_from_csv(path: str) -> str:
79
  return "\n".join(all_text)
80
 
81
  def extract_text_from_pdf(path: str) -> str:
 
 
 
82
  all_text = []
83
  try:
84
  with pdfplumber.open(path) as pdf:
@@ -138,7 +152,6 @@ def init_agent() -> TxAgent:
138
  agent.init_model()
139
  return agent
140
 
141
- # === Main Processing ===
142
  def analyze_batches(agent, batches: List[List[str]]) -> List[str]:
143
  results = []
144
  for batch in batches:
@@ -172,7 +185,23 @@ def analyze_batches(agent, batches: List[List[str]]) -> List[str]:
172
  return results
173
 
174
  def generate_final_summary(agent, combined: str) -> str:
175
- final_prompt = f"Provide a structured medical report based on the following summaries:\n\n{combined}\n\nRespond in detailed medical bullet points."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  final_response = ""
177
  for r in agent.run_gradio_chat(
178
  message=final_prompt,
@@ -191,7 +220,10 @@ def generate_final_summary(agent, combined: str) -> str:
191
  final_response += m.content
192
  elif hasattr(r, "content"):
193
  final_response += r.content
194
- return clean_response(final_response)
 
 
 
195
 
196
  def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
197
  if not file or not hasattr(file, "name"):
@@ -231,38 +263,18 @@ def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Di
231
 
232
  def create_ui(agent):
233
  with gr.Blocks(css="""
234
- html, body, .gradio-container {
235
- background: #0e1621;
236
- color: #e0e0e0;
237
- padding: 16px;
238
- }
239
- button.svelte-1ipelgc {
240
- background: linear-gradient(to right, #1e88e5, #0d47a1) !important;
241
- border: 1px solid #0d47a1 !important;
242
- color: white !important;
243
- font-weight: bold !important;
244
- padding: 10px 20px !important;
245
- border-radius: 8px !important;
246
- }
247
- button.svelte-1ipelgc:hover {
248
- background: linear-gradient(to right, #2196f3, #1565c0) !important;
249
- border: 1px solid #1565c0 !important;
250
- }
251
- .gr-column {
252
- align-items: center !important;
253
- gap: 12px;
254
- }
255
- .gr-file, .gr-button {
256
- width: 100% !important;
257
- max-width: 400px;
258
- }
259
  """) as demo:
260
  gr.Markdown("""
261
  <h2 style="text-align:center;">πŸ“„ CPS: Clinical Patient Support System</h2>
262
  <p style="text-align:center;">Analyze and summarize unstructured medical files using AI (optimized for A100 GPU).</p>
263
  """)
264
  with gr.Column():
265
- chatbot = gr.Chatbot(label="🧠 CPS Assistant", height=480, type="messages") # Reduced height
266
  upload = gr.File(label="πŸ“‚ Upload Medical File", file_types=[".xlsx", ".csv", ".pdf"])
267
  analyze = gr.Button("🧠 Analyze")
268
  download = gr.File(label="πŸ“₯ Download Report", visible=False, interactive=False)
@@ -281,4 +293,4 @@ def create_ui(agent):
281
  if __name__ == "__main__":
282
  agent = init_agent()
283
  ui = create_ui(agent)
284
- ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
 
37
  MAX_CHUNK_TOKENS = 8192
38
  BATCH_SIZE = 2
39
  PROMPT_OVERHEAD = 300
40
+ SAFE_SLEEP = 0.5
41
 
42
  # === Utility Functions ===
43
  def estimate_tokens(text: str) -> int:
 
48
  text = re.sub(r"\n{3,}", "\n\n", text)
49
  return text.strip()
50
 
51
+ def remove_duplicate_paragraphs(text: str) -> str:
52
+ paragraphs = text.strip().split("\n\n")
53
+ seen = set()
54
+ unique_paragraphs = []
55
+ for p in paragraphs:
56
+ clean_p = p.strip()
57
+ if clean_p and clean_p not in seen:
58
+ unique_paragraphs.append(clean_p)
59
+ seen.add(clean_p)
60
+ return "\n\n".join(unique_paragraphs)
61
+
62
  def extract_text_from_excel(path: str) -> str:
63
  all_text = []
64
  xls = pd.ExcelFile(path)
 
90
  return "\n".join(all_text)
91
 
92
  def extract_text_from_pdf(path: str) -> str:
93
+ import logging
94
+ logging.getLogger("pdfminer").setLevel(logging.ERROR)
95
+
96
  all_text = []
97
  try:
98
  with pdfplumber.open(path) as pdf:
 
152
  agent.init_model()
153
  return agent
154
 
 
155
  def analyze_batches(agent, batches: List[List[str]]) -> List[str]:
156
  results = []
157
  for batch in batches:
 
185
  return results
186
 
187
  def generate_final_summary(agent, combined: str) -> str:
188
+ combined = remove_duplicate_paragraphs(combined)
189
+ final_prompt = f"""
190
+ You are an expert clinical summarizer. Analyze the following summaries carefully and generate a **single final concise structured medical report**, avoiding any repetition or redundancy.
191
+
192
+ Summaries:
193
+ {combined}
194
+
195
+ Respond with:
196
+ - Diagnostic Patterns
197
+ - Medication Issues
198
+ - Missed Opportunities
199
+ - Inconsistencies
200
+ - Follow-up Recommendations
201
+
202
+ Avoid repeating the same points multiple times.
203
+ """.strip()
204
+
205
  final_response = ""
206
  for r in agent.run_gradio_chat(
207
  message=final_prompt,
 
220
  final_response += m.content
221
  elif hasattr(r, "content"):
222
  final_response += r.content
223
+
224
+ final_response = clean_response(final_response)
225
+ final_response = remove_duplicate_paragraphs(final_response)
226
+ return final_response
227
 
228
  def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
229
  if not file or not hasattr(file, "name"):
 
263
 
264
  def create_ui(agent):
265
  with gr.Blocks(css="""
266
+ html, body, .gradio-container { background: #0e1621; color: #e0e0e0; padding: 16px; }
267
+ button.svelte-1ipelgc { background: linear-gradient(to right, #1e88e5, #0d47a1) !important; border: 1px solid #0d47a1 !important; color: white !important; font-weight: bold !important; padding: 10px 20px !important; border-radius: 8px !important; }
268
+ button.svelte-1ipelgc:hover { background: linear-gradient(to right, #2196f3, #1565c0) !important; border: 1px solid #1565c0 !important; }
269
+ .gr-column { align-items: center !important; gap: 12px; }
270
+ .gr-file, .gr-button { width: 100% !important; max-width: 400px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  """) as demo:
272
  gr.Markdown("""
273
  <h2 style="text-align:center;">πŸ“„ CPS: Clinical Patient Support System</h2>
274
  <p style="text-align:center;">Analyze and summarize unstructured medical files using AI (optimized for A100 GPU).</p>
275
  """)
276
  with gr.Column():
277
+ chatbot = gr.Chatbot(label="🧠 CPS Assistant", height=480, type="messages")
278
  upload = gr.File(label="πŸ“‚ Upload Medical File", file_types=[".xlsx", ".csv", ".pdf"])
279
  analyze = gr.Button("🧠 Analyze")
280
  download = gr.File(label="πŸ“₯ Download Report", visible=False, interactive=False)
 
293
  if __name__ == "__main__":
294
  agent = init_agent()
295
  ui = create_ui(agent)
296
+ ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)