Ali2206 commited on
Commit
e99ba15
·
verified ·
1 Parent(s): e63e1d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -144
app.py CHANGED
@@ -11,7 +11,7 @@ from datetime import datetime
11
  import time
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
13
 
14
- # Configuration and setup
15
  persistent_dir = "/data/hf_cache"
16
  os.makedirs(persistent_dir, exist_ok=True)
17
 
@@ -38,10 +38,6 @@ MAX_NEW_TOKENS = 2048
38
  PROMPT_OVERHEAD = 500
39
 
40
  def clean_response(text: str) -> str:
41
- try:
42
- text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
43
- except UnicodeError:
44
- text = text.encode('utf-8', 'replace').decode('utf-8')
45
  text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
46
  text = re.sub(r"\n{3,}", "\n\n", text)
47
  text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
@@ -52,35 +48,28 @@ def estimate_tokens(text: str) -> int:
52
 
53
  def extract_text_from_excel(file_path: str) -> str:
54
  all_text = []
55
- try:
56
- xls = pd.ExcelFile(file_path)
57
- for sheet_name in xls.sheet_names:
58
- df = xls.parse(sheet_name)
59
- df = df.astype(str).fillna("")
60
- rows = df.apply(lambda row: " | ".join(row), axis=1)
61
- sheet_text = [f"[{sheet_name}] {line}" for line in rows]
62
- all_text.extend(sheet_text)
63
- except Exception as e:
64
- raise ValueError(f"Failed to extract text from Excel file: {str(e)}")
65
  return "\n".join(all_text)
66
 
67
  def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
68
- effective_max_tokens = max_tokens - PROMPT_OVERHEAD
69
- if effective_max_tokens <= 0:
70
- raise ValueError("Effective max tokens must be positive.")
71
- lines = text.split("\n")
72
- chunks, current_chunk, current_tokens = [], [], 0
73
  for line in lines:
74
- line_tokens = estimate_tokens(line)
75
- if current_tokens + line_tokens > effective_max_tokens:
76
- if current_chunk:
77
- chunks.append("\n".join(current_chunk))
78
- current_chunk, current_tokens = [line], line_tokens
79
  else:
80
- current_chunk.append(line)
81
- current_tokens += line_tokens
82
- if current_chunk:
83
- chunks.append("\n".join(current_chunk))
84
  return chunks
85
 
86
  def build_prompt_from_text(chunk: str) -> str:
@@ -99,128 +88,86 @@ Analyze the following clinical notes and provide a detailed, concise summary foc
99
  {chunk}
100
 
101
  ---
 
102
  Respond in well-structured bullet points with medical reasoning.
103
  """
104
 
105
  def init_agent():
106
- default_tool_path = os.path.abspath("data/new_tool.json")
107
- target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
108
- if not os.path.exists(target_tool_path):
109
- shutil.copy(default_tool_path, target_tool_path)
110
  agent = TxAgent(
111
  model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
112
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
113
- tool_files_dict={"new_tool": target_tool_path},
114
  force_finish=True,
115
  enable_checker=True,
116
  step_rag_num=4,
117
- seed=100,
118
- additional_default_tools=[]
119
  )
120
  agent.init_model()
121
  return agent
122
 
123
  def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
124
  messages = chatbot_state if chatbot_state else []
125
- report_path = None
126
-
127
  if file is None or not hasattr(file, "name"):
128
- messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file before analyzing."})
129
- return messages, report_path
130
-
131
- try:
132
- messages.append({"role": "user", "content": f"Processing Excel file: {os.path.basename(file.name)}"})
133
- extracted_text = extract_text_from_excel(file.name)
134
- chunks = split_text_into_chunks(extracted_text)
135
- chunk_responses = [None] * len(chunks)
136
-
137
- def analyze_chunk(index: int, chunk: str) -> Tuple[int, str]:
138
- prompt = build_prompt_from_text(chunk)
139
- prompt_tokens = estimate_tokens(prompt)
140
- if prompt_tokens > MAX_MODEL_TOKENS:
141
- return index, f"❌ Chunk {index+1} prompt too long. Skipping..."
142
- response = ""
143
- try:
144
- for result in agent.run_gradio_chat(
145
- message=prompt,
146
- history=[],
147
- temperature=0.2,
148
- max_new_tokens=MAX_NEW_TOKENS,
149
- max_token=MAX_MODEL_TOKENS,
150
- call_agent=False,
151
- conversation=[],
152
- ):
153
- if isinstance(result, str):
154
- response += result
155
- elif isinstance(result, list):
156
- for r in result:
157
- if hasattr(r, "content"):
158
- response += r.content
159
- elif hasattr(result, "content"):
160
- response += result.content
161
- except Exception as e:
162
- return index, f"❌ Error analyzing chunk {index+1}: {str(e)}"
163
- return index, clean_response(response)
164
-
165
- with ThreadPoolExecutor(max_workers=1) as executor:
166
- futures = [executor.submit(analyze_chunk, i, chunk) for i, chunk in enumerate(chunks)]
167
- for future in as_completed(futures):
168
- i, result = future.result()
169
- chunk_responses[i] = result
170
- if result.startswith("❌"):
171
- messages.append({"role": "assistant", "content": result})
172
-
173
- valid_responses = [res for res in chunk_responses if not res.startswith("❌")]
174
- if not valid_responses:
175
- messages.append({"role": "assistant", "content": "❌ No valid chunk responses to summarize."})
176
- return messages, report_path
177
-
178
- summary = "\n\n".join(valid_responses)
179
- final_prompt = f"Provide a structured, consolidated clinical analysis from these results:\n\n{summary}"
180
- messages.append({"role": "assistant", "content": "📊 Generating final report..."})
181
-
182
- final_report_text = ""
183
- for result in agent.run_gradio_chat(
184
- message=final_prompt,
185
- history=[],
186
- temperature=0.2,
187
- max_new_tokens=MAX_NEW_TOKENS,
188
- max_token=MAX_MODEL_TOKENS,
189
- call_agent=False,
190
- conversation=[],
191
- ):
192
- if isinstance(result, str):
193
- final_report_text += result
194
- elif isinstance(result, list):
195
- for r in result:
196
  if hasattr(r, "content"):
197
- final_report_text += r.content
198
- elif hasattr(result, "content"):
199
- final_report_text += result.content
200
-
201
- cleaned = clean_response(final_report_text)
202
- report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
203
- with open(report_path, 'w') as f:
204
- f.write(f"# 🧠 Final Patient Report\n\n{cleaned}")
205
-
206
- messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{cleaned}"})
207
- messages.append({"role": "assistant", "content": f"✅ Report generated and saved: {os.path.basename(report_path)}"})
208
-
209
- except Exception as e:
210
- messages.append({"role": "assistant", "content": f" Error processing file: {str(e)}"})
211
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  return messages, report_path
213
 
214
  def create_ui(agent):
215
- with gr.Blocks(css="""
216
  html, body, .gradio-container {
217
  height: 100vh;
218
- width: 100vw;
219
- margin: 0;
220
- padding: 0;
221
- font-family: 'Inter', sans-serif;
222
  background-color: #111827;
223
  color: #e5e7eb;
 
224
  }
225
  .gr-button.primary {
226
  background: #2563eb;
@@ -234,30 +181,33 @@ def create_ui(agent):
234
  }
235
  .gr-chatbot {
236
  background-color: #1f2937;
237
- color: #e5e7eb;
238
  border-radius: 10px;
239
  padding: 1rem;
240
- border: 1px solid #374151;
241
  }
242
- .gr-markdown, .gr-file-upload {
243
  background-color: #1f2937;
244
- border-radius: 10px;
245
- box-shadow: 0 0 10px rgba(0,0,0,0.2);
246
  border: 1px solid #374151;
 
247
  }
248
- """) as demo:
249
- gr.Markdown("""
250
- <h2 style='color:#60a5fa'>🩺 Patient History AI Assistant</h2>
251
- <p style='color:#cbd5e1'>Upload a clinical Excel file and receive a structured diagnostic summary.</p>
252
- """)
253
-
254
  with gr.Row():
255
  with gr.Column(scale=3):
256
- chatbot = gr.Chatbot(label="Clinical Assistant", height=700, type="messages")
 
 
 
 
 
 
 
 
257
  with gr.Column(scale=1):
258
- file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
259
- analyze_btn = gr.Button("🧠 Analyze", variant="primary")
260
- report_output = gr.File(label="Download Report", visible=False, interactive=False)
 
261
 
262
  chatbot_state = gr.State(value=[])
263
 
@@ -269,11 +219,11 @@ def create_ui(agent):
269
 
270
  return demo
271
 
272
- if __name__ == "__main__":
273
  try:
274
  agent = init_agent()
275
  demo = create_ui(agent)
276
- demo.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
277
  except Exception as e:
278
- print(f"Error: {str(e)}")
279
  sys.exit(1)
 
11
  import time
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
13
 
14
+ # Setup paths
15
  persistent_dir = "/data/hf_cache"
16
  os.makedirs(persistent_dir, exist_ok=True)
17
 
 
38
  PROMPT_OVERHEAD = 500
39
 
40
  def clean_response(text: str) -> str:
 
 
 
 
41
  text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
42
  text = re.sub(r"\n{3,}", "\n\n", text)
43
  text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
 
48
 
49
  def extract_text_from_excel(file_path: str) -> str:
50
  all_text = []
51
+ xls = pd.ExcelFile(file_path)
52
+ for sheet_name in xls.sheet_names:
53
+ df = xls.parse(sheet_name).astype(str).fillna("")
54
+ rows = df.apply(lambda row: " | ".join(row), axis=1)
55
+ sheet_text = [f"[{sheet_name}] {line}" for line in rows]
56
+ all_text.extend(sheet_text)
 
 
 
 
57
  return "\n".join(all_text)
58
 
59
  def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
60
+ effective_max = max_tokens - PROMPT_OVERHEAD
61
+ lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
 
 
 
62
  for line in lines:
63
+ t = estimate_tokens(line)
64
+ if curr_tokens + t > effective_max:
65
+ if curr_chunk:
66
+ chunks.append("\n".join(curr_chunk))
67
+ curr_chunk, curr_tokens = [line], t
68
  else:
69
+ curr_chunk.append(line)
70
+ curr_tokens += t
71
+ if curr_chunk:
72
+ chunks.append("\n".join(curr_chunk))
73
  return chunks
74
 
75
  def build_prompt_from_text(chunk: str) -> str:
 
88
  {chunk}
89
 
90
  ---
91
+
92
  Respond in well-structured bullet points with medical reasoning.
93
  """
94
 
95
  def init_agent():
96
+ tool_path = os.path.join(tool_cache_dir, "new_tool.json")
97
+ if not os.path.exists(tool_path):
98
+ shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
 
99
  agent = TxAgent(
100
  model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
101
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
102
+ tool_files_dict={"new_tool": tool_path},
103
  force_finish=True,
104
  enable_checker=True,
105
  step_rag_num=4,
106
+ seed=100
 
107
  )
108
  agent.init_model()
109
  return agent
110
 
111
  def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
112
  messages = chatbot_state if chatbot_state else []
 
 
113
  if file is None or not hasattr(file, "name"):
114
+ return messages + [{"role": "assistant", "content": "❌ Please upload a valid Excel file."}], None
115
+
116
+ messages.append({"role": "user", "content": f"Processing Excel file: {os.path.basename(file.name)}"})
117
+ text = extract_text_from_excel(file.name)
118
+ chunks = split_text_into_chunks(text)
119
+ chunk_responses = [None] * len(chunks)
120
+
121
+ def analyze_chunk(i, chunk):
122
+ prompt = build_prompt_from_text(chunk)
123
+ response = ""
124
+ for res in agent.run_gradio_chat(message=prompt, history=[], temperature=0.2, max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS, call_agent=False, conversation=[]):
125
+ if isinstance(res, str):
126
+ response += res
127
+ elif hasattr(res, "content"):
128
+ response += res.content
129
+ elif isinstance(res, list):
130
+ for r in res:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  if hasattr(r, "content"):
132
+ response += r.content
133
+ return i, clean_response(response)
134
+
135
+ with ThreadPoolExecutor(max_workers=1) as executor:
136
+ futures = [executor.submit(analyze_chunk, i, c) for i, c in enumerate(chunks)]
137
+ for f in as_completed(futures):
138
+ i, result = f.result()
139
+ chunk_responses[i] = result
140
+
141
+ valid = [r for r in chunk_responses if r and not r.startswith("❌")]
142
+ if not valid:
143
+ return messages + [{"role": "assistant", "content": "❌ No valid chunk results."}], None
144
+
145
+ summary_prompt = f"Summarize this analysis in a final structured report:\n\n" + "\n\n".join(valid)
146
+ messages.append({"role": "assistant", "content": "📊 Generating final report..."})
147
+
148
+ final_report = ""
149
+ for res in agent.run_gradio_chat(message=summary_prompt, history=[], temperature=0.2, max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS, call_agent=False, conversation=[]):
150
+ if isinstance(res, str):
151
+ final_report += res
152
+ elif hasattr(res, "content"):
153
+ final_report += res.content
154
+
155
+ cleaned = clean_response(final_report)
156
+ report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
157
+ with open(report_path, 'w') as f:
158
+ f.write(f"# 🧠 Final Patient Report\n\n{cleaned}")
159
+
160
+ messages.append({"role": "assistant", "content": f"📊 Final Report:\n\n{cleaned}"})
161
+ messages.append({"role": "assistant", "content": f"✅ Report generated and saved: {os.path.basename(report_path)}"})
162
  return messages, report_path
163
 
164
  def create_ui(agent):
165
+ with gr.Blocks(css=\"\"\"
166
  html, body, .gradio-container {
167
  height: 100vh;
 
 
 
 
168
  background-color: #111827;
169
  color: #e5e7eb;
170
+ font-family: 'Inter', sans-serif;
171
  }
172
  .gr-button.primary {
173
  background: #2563eb;
 
181
  }
182
  .gr-chatbot {
183
  background-color: #1f2937;
184
+ border: 1px solid #374151;
185
  border-radius: 10px;
186
  padding: 1rem;
 
187
  }
188
+ .gr-file-upload {
189
  background-color: #1f2937;
 
 
190
  border: 1px solid #374151;
191
+ border-radius: 8px;
192
  }
193
+ \"\"\") as demo:
194
+ gr.Markdown(\"\"\"<h2 style='color:#60a5fa'>🩺 Patient History AI Assistant</h2><p>Upload a clinical Excel file and receive a structured diagnostic summary.</p>\"\"\")
 
 
 
 
195
  with gr.Row():
196
  with gr.Column(scale=3):
197
+ chatbot = gr.Chatbot(
198
+ label=\"Clinical Assistant\",
199
+ height=700,
200
+ type=\"messages\",
201
+ avatar_images=[
202
+ \"https://ui-avatars.com/api/?name=AI&background=2563eb&color=fff&size=128\",
203
+ \"https://ui-avatars.com/api/?name=You&background=374151&color=fff&size=128\"
204
+ ]
205
+ )
206
  with gr.Column(scale=1):
207
+ with gr.Row():
208
+ file_upload = gr.File(label=\"\", file_types=[\".xlsx\"], elem_id=\"upload-btn\")
209
+ analyze_btn = gr.Button(\"🧠 Analyze\", variant=\"primary\")
210
+ report_output = gr.File(label=\"Download Report\", visible=False, interactive=False)
211
 
212
  chatbot_state = gr.State(value=[])
213
 
 
219
 
220
  return demo
221
 
222
+ if __name__ == \"__main__\":
223
  try:
224
  agent = init_agent()
225
  demo = create_ui(agent)
226
+ demo.launch(server_name=\"0.0.0.0\", server_port=7860, allowed_paths=[\"/data/hf_cache/reports\"], share=False)
227
  except Exception as e:
228
+ print(f\"Error: {str(e)}\")
229
  sys.exit(1)