Ali2206 commited on
Commit
59f3278
Β·
verified Β·
1 Parent(s): a1a096d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -48
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import sys, os, json, shutil, re, time, gc, hashlib
2
  import pandas as pd
3
  from datetime import datetime
4
- from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from typing import List, Tuple, Dict, Union
6
 
7
  import gradio as gr
8
 
 
 
9
  # Constants
10
  MAX_MODEL_TOKENS = 131072
11
  MAX_NEW_TOKENS = 4096
@@ -42,14 +43,11 @@ def clean_response(text: str) -> str:
42
 
43
  def extract_text_from_excel(path: str) -> str:
44
  all_text = []
45
- try:
46
- xls = pd.ExcelFile(path)
47
- for sheet in xls.sheet_names:
48
- df = xls.parse(sheet).astype(str).fillna("")
49
- rows = df.apply(lambda row: " | ".join(row), axis=1)
50
- all_text += [f"[{sheet}] {line}" for line in rows]
51
- except Exception as e:
52
- raise ValueError(f"Error reading Excel file: {str(e)}")
53
  return "\n".join(all_text)
54
 
55
  def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
@@ -69,27 +67,12 @@ def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
69
  return chunks
70
 
71
  def build_prompt(chunk: str) -> str:
72
- return f"""### Unstructured Clinical Records
73
-
74
- Analyze the clinical notes below and summarize with:
75
- - Diagnostic Patterns
76
- - Medication Issues
77
- - Missed Opportunities
78
- - Inconsistencies
79
- - Follow-up Recommendations
80
-
81
- ---
82
-
83
- {chunk}
84
-
85
- ---
86
- Respond concisely in bullet points with clinical reasoning."""
87
 
88
  def init_agent() -> TxAgent:
89
  tool_path = os.path.join(tool_cache_dir, "new_tool.json")
90
  if not os.path.exists(tool_path):
91
  shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
92
-
93
  agent = TxAgent(
94
  model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
95
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
@@ -102,15 +85,15 @@ def init_agent() -> TxAgent:
102
  agent.init_model()
103
  return agent
104
 
105
- def analyze_chunks_parallel(agent, chunks: List[str]) -> List[str]:
106
- results = [None] * len(chunks)
107
-
108
- def analyze(i, chunk):
109
  prompt = build_prompt(chunk)
 
 
 
 
110
  try:
111
- if estimate_tokens(prompt) > MAX_MODEL_TOKENS:
112
- return i, f"❌ Chunk {i+1} too long. Skipped."
113
- response = ""
114
  for r in agent.run_gradio_chat(
115
  message=prompt,
116
  history=[],
@@ -129,24 +112,13 @@ def analyze_chunks_parallel(agent, chunks: List[str]) -> List[str]:
129
  elif hasattr(r, "content"):
130
  response += r.content
131
  gc.collect()
132
- return i, clean_response(response)
133
  except Exception as e:
134
- return i, f"❌ Error in chunk {i+1}: {str(e)}"
135
-
136
- with ThreadPoolExecutor(max_workers=4) as executor:
137
- futures = [executor.submit(analyze, i, chunk) for i, chunk in enumerate(chunks)]
138
- for future in as_completed(futures):
139
- i, res = future.result()
140
- results[i] = res
141
-
142
  return results
143
 
144
  def generate_final_summary(agent, combined: str) -> str:
145
- final_prompt = f"""Provide a structured medical report based on the following summaries:
146
-
147
- {combined}
148
-
149
- Respond in detailed medical bullet points."""
150
  full_report = ""
151
  for r in agent.run_gradio_chat(
152
  message=final_prompt,
@@ -178,7 +150,7 @@ def process_report(agent, file, messages: List[Dict[str, str]]) -> Tuple[List[Di
178
  chunks = split_text(extracted)
179
  messages.append({"role": "assistant", "content": f"πŸ” Split into {len(chunks)} chunks. Analyzing..."})
180
 
181
- chunk_results = analyze_chunks_parallel(agent, chunks)
182
  valid = [res for res in chunk_results if not res.startswith("❌")]
183
 
184
  if not valid:
@@ -226,4 +198,4 @@ if __name__ == "__main__":
226
  ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
227
  except Exception as err:
228
  print(f"Startup failed: {err}")
229
- sys.exit(1)
 
1
  import sys, os, json, shutil, re, time, gc, hashlib
2
  import pandas as pd
3
  from datetime import datetime
 
4
  from typing import List, Tuple, Dict, Union
5
 
6
  import gradio as gr
7
 
8
+ from concurrent.futures import ThreadPoolExecutor
9
+
10
  # Constants
11
  MAX_MODEL_TOKENS = 131072
12
  MAX_NEW_TOKENS = 4096
 
43
 
44
  def extract_text_from_excel(path: str) -> str:
45
  all_text = []
46
+ xls = pd.ExcelFile(path)
47
+ for sheet in xls.sheet_names:
48
+ df = xls.parse(sheet).astype(str).fillna("")
49
+ rows = df.apply(lambda row: " | ".join(row), axis=1)
50
+ all_text += [f"[{sheet}] {line}" for line in rows]
 
 
 
51
  return "\n".join(all_text)
52
 
53
  def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
 
67
  return chunks
68
 
69
  def build_prompt(chunk: str) -> str:
70
+ return f"""### Unstructured Clinical Records\n\nAnalyze the clinical notes below and summarize with:\n- Diagnostic Patterns\n- Medication Issues\n- Missed Opportunities\n- Inconsistencies\n- Follow-up Recommendations\n\n---\n\n{chunk}\n\n---\nRespond concisely in bullet points with clinical reasoning."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def init_agent() -> TxAgent:
73
  tool_path = os.path.join(tool_cache_dir, "new_tool.json")
74
  if not os.path.exists(tool_path):
75
  shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
 
76
  agent = TxAgent(
77
  model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
78
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
 
85
  agent.init_model()
86
  return agent
87
 
88
+ def analyze_serial(agent, chunks: List[str]) -> List[str]:
89
+ results = []
90
+ for i, chunk in enumerate(chunks):
 
91
  prompt = build_prompt(chunk)
92
+ if estimate_tokens(prompt) > MAX_MODEL_TOKENS:
93
+ results.append(f"❌ Chunk {i+1} too long. Skipped.")
94
+ continue
95
+ response = ""
96
  try:
 
 
 
97
  for r in agent.run_gradio_chat(
98
  message=prompt,
99
  history=[],
 
112
  elif hasattr(r, "content"):
113
  response += r.content
114
  gc.collect()
115
+ results.append(clean_response(response))
116
  except Exception as e:
117
+ results.append(f"❌ Error in chunk {i+1}: {str(e)}")
 
 
 
 
 
 
 
118
  return results
119
 
120
  def generate_final_summary(agent, combined: str) -> str:
121
+ final_prompt = f"""Provide a structured medical report based on the following summaries:\n\n{combined}\n\nRespond in detailed medical bullet points."""
 
 
 
 
122
  full_report = ""
123
  for r in agent.run_gradio_chat(
124
  message=final_prompt,
 
150
  chunks = split_text(extracted)
151
  messages.append({"role": "assistant", "content": f"πŸ” Split into {len(chunks)} chunks. Analyzing..."})
152
 
153
+ chunk_results = analyze_serial(agent, chunks)
154
  valid = [res for res in chunk_results if not res.startswith("❌")]
155
 
156
  if not valid:
 
198
  ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
199
  except Exception as err:
200
  print(f"Startup failed: {err}")
201
+ sys.exit(1)