Ali2206 commited on
Commit
6e39ead
·
verified ·
1 Parent(s): affa0af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -89
app.py CHANGED
@@ -34,28 +34,24 @@ from txagent.txagent import TxAgent
34
 
35
  # Constants
36
  MAX_TOKENS = 32768
37
- CHUNK_SIZE = 10000
38
  MAX_NEW_TOKENS = 2048
39
- MAX_BOOKINGS_PER_CHUNK = 5
40
 
41
- def file_hash(path: str) -> str:
42
- with open(path, "rb") as f:
43
- return hashlib.md5(f.read()).hexdigest()
44
 
45
  def clean_response(text: str) -> str:
46
  try:
47
  text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
48
  except UnicodeError:
49
  text = text.encode('utf-8', 'replace').decode('utf-8')
50
-
51
  text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
52
  text = re.sub(r"\n{3,}", "\n\n", text)
53
  text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
54
  return text.strip()
55
 
 
56
  def estimate_tokens(text: str) -> int:
57
  return len(text) // 3.5
58
 
 
59
  def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
60
  data = {
61
  'bookings': defaultdict(list),
@@ -66,7 +62,7 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
66
  'doctors': set(),
67
  'timeline': []
68
  }
69
-
70
  df = df.sort_values('Interview Date')
71
  for booking, group in df.groupby('Booking Number'):
72
  for _, row in group.iterrows():
@@ -79,11 +75,11 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
79
  'response': str(row['Item Response']),
80
  'notes': str(row['Description'])
81
  }
82
-
83
  data['bookings'][booking].append(entry)
84
  data['timeline'].append(entry)
85
  data['doctors'].add(entry['doctor'])
86
-
87
  form_lower = entry['form'].lower()
88
  if 'medication' in form_lower or 'drug' in form_lower:
89
  data['medications'][entry['item']].append(entry)
@@ -93,9 +89,10 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
93
  data['tests'][entry['item']].append(entry)
94
  elif 'procedure' in form_lower or 'surgery' in form_lower:
95
  data['procedures'][entry['item']].append(entry)
96
-
97
  return data
98
 
 
99
  def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
100
  prompt_lines = [
101
  "### Patient Clinical Reasoning Task",
@@ -138,33 +135,14 @@ def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str])
138
 
139
  return "\n".join(prompt_lines)
140
 
141
- def chunk_bookings(patient_data: Dict[str, Any]) -> List[List[str]]:
142
- all_bookings = list(patient_data['bookings'].keys())
143
- booking_sizes = []
144
-
145
- for booking in all_bookings:
146
- entries = patient_data['bookings'][booking]
147
- size = sum(estimate_tokens(str(e)) for e in entries)
148
- booking_sizes.append((booking, size))
149
-
150
- booking_sizes.sort(key=lambda x: x[1], reverse=True)
151
- chunks = [[] for _ in range(3)]
152
- chunk_sizes = [0, 0, 0]
153
-
154
- for booking, size in booking_sizes:
155
- min_chunk = chunk_sizes.index(min(chunk_sizes))
156
- chunks[min_chunk].append(booking)
157
- chunk_sizes[min_chunk] += size
158
-
159
- return chunks
160
 
161
  def init_agent():
162
  default_tool_path = os.path.abspath("data/new_tool.json")
163
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
164
-
165
  if not os.path.exists(target_tool_path):
166
  shutil.copy(default_tool_path, target_tool_path)
167
-
168
  agent = TxAgent(
169
  model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
170
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
@@ -178,6 +156,7 @@ def init_agent():
178
  agent.init_model()
179
  return agent
180
 
 
181
  def analyze_with_agent(agent, prompt: str) -> str:
182
  try:
183
  response = ""
@@ -198,11 +177,11 @@ def analyze_with_agent(agent, prompt: str) -> str:
198
  response += clean_response(result) + "\n"
199
  elif hasattr(result, 'content'):
200
  response += clean_response(result.content) + "\n"
201
-
202
  return response.strip()
203
  except Exception as e:
204
  return f"Error in analysis: {str(e)}"
205
 
 
206
  def analyze(file):
207
  if not file:
208
  raise gr.Error("Please upload a file")
@@ -212,79 +191,65 @@ def analyze(file):
212
  patient_data = process_patient_data(df)
213
  all_bookings = list(patient_data['bookings'].keys())
214
 
215
- # Build one full prompt with all bookings
216
- prompt = generate_analysis_prompt(patient_data, all_bookings)
 
 
217
 
218
- # Add holistic reasoning instruction to prompt
219
- prompt += "\n\n" + "\n".join([
220
- "**Please analyze the entire patient history across all bookings.**",
221
- "Look for missed diagnoses, inconsistent notes across different doctors,",
222
- "missing follow-ups, or any gaps in care delivery.",
223
- "Provide detailed insight into what may have been overlooked."
224
- ])
 
 
 
 
 
225
 
226
- # ✅ Run the agent once over the full patient timeline
227
- response = analyze_with_agent(agent, prompt)
228
 
229
- # Wrap and save the result
230
- full_report = f"# 🧠 Full Patient History Analysis\n\n{response}"
 
 
 
 
 
231
 
232
- report_path = os.path.join(
233
- report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
234
- )
 
 
235
  with open(report_path, 'w') as f:
236
  f.write(full_report)
237
 
238
- yield full_report, report_path
239
 
240
  except Exception as e:
241
  raise gr.Error(f"Error: {str(e)}")
242
 
 
243
  def create_ui(agent):
244
- with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
245
- gr.Markdown("# 🏥 Patient History Analyzer")
246
-
247
- with gr.Tabs():
248
- with gr.TabItem("Analysis"):
249
- with gr.Row():
250
- with gr.Column(scale=1):
251
- file_upload = gr.File(
252
- label="Upload Excel File",
253
- file_types=[".xlsx"],
254
- file_count="single"
255
- )
256
- analyze_btn = gr.Button("Analyze", variant="primary")
257
- status = gr.Markdown("Ready")
258
-
259
- with gr.Column(scale=2):
260
- output = gr.Markdown()
261
- report = gr.File(label="Download Report")
262
-
263
- with gr.TabItem("Instructions"):
264
- gr.Markdown("""
265
- ## How to Use
266
- 1. Upload patient history Excel
267
- 2. Click Analyze
268
- 3. View/download report
269
-
270
- **Required Columns:**
271
- - Booking Number
272
- - Interview Date
273
- - Interviewer
274
- - Form Name
275
- - Form Item
276
- - Item Response
277
- - Description
278
- """)
279
-
280
  analyze_btn.click(
281
  analyze,
282
- inputs=file_upload,
283
- outputs=[output, report]
284
  )
285
-
286
  return demo
287
 
 
288
  if __name__ == "__main__":
289
  try:
290
  agent = init_agent()
@@ -297,4 +262,4 @@ if __name__ == "__main__":
297
  )
298
  except Exception as e:
299
  print(f"Error: {str(e)}")
300
- sys.exit(1)
 
34
 
35
  # Constants
36
  MAX_TOKENS = 32768
 
37
  MAX_NEW_TOKENS = 2048
 
38
 
 
 
 
39
 
40
  def clean_response(text: str) -> str:
41
  try:
42
  text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
43
  except UnicodeError:
44
  text = text.encode('utf-8', 'replace').decode('utf-8')
 
45
  text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
46
  text = re.sub(r"\n{3,}", "\n\n", text)
47
  text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
48
  return text.strip()
49
 
50
+
51
  def estimate_tokens(text: str) -> int:
52
  return len(text) // 3.5
53
 
54
+
55
  def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
56
  data = {
57
  'bookings': defaultdict(list),
 
62
  'doctors': set(),
63
  'timeline': []
64
  }
65
+
66
  df = df.sort_values('Interview Date')
67
  for booking, group in df.groupby('Booking Number'):
68
  for _, row in group.iterrows():
 
75
  'response': str(row['Item Response']),
76
  'notes': str(row['Description'])
77
  }
78
+
79
  data['bookings'][booking].append(entry)
80
  data['timeline'].append(entry)
81
  data['doctors'].add(entry['doctor'])
82
+
83
  form_lower = entry['form'].lower()
84
  if 'medication' in form_lower or 'drug' in form_lower:
85
  data['medications'][entry['item']].append(entry)
 
89
  data['tests'][entry['item']].append(entry)
90
  elif 'procedure' in form_lower or 'surgery' in form_lower:
91
  data['procedures'][entry['item']].append(entry)
92
+
93
  return data
94
 
95
+
96
  def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
97
  prompt_lines = [
98
  "### Patient Clinical Reasoning Task",
 
135
 
136
  return "\n".join(prompt_lines)
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  def init_agent():
140
  default_tool_path = os.path.abspath("data/new_tool.json")
141
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
142
+
143
  if not os.path.exists(target_tool_path):
144
  shutil.copy(default_tool_path, target_tool_path)
145
+
146
  agent = TxAgent(
147
  model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
148
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
 
156
  agent.init_model()
157
  return agent
158
 
159
+
160
  def analyze_with_agent(agent, prompt: str) -> str:
161
  try:
162
  response = ""
 
177
  response += clean_response(result) + "\n"
178
  elif hasattr(result, 'content'):
179
  response += clean_response(result.content) + "\n"
 
180
  return response.strip()
181
  except Exception as e:
182
  return f"Error in analysis: {str(e)}"
183
 
184
+
185
  def analyze(file):
186
  if not file:
187
  raise gr.Error("Please upload a file")
 
191
  patient_data = process_patient_data(df)
192
  all_bookings = list(patient_data['bookings'].keys())
193
 
194
+ # Chunking logic based on estimated token limits
195
+ chunks = []
196
+ current_chunk = []
197
+ current_size = 0
198
 
199
+ for booking in all_bookings:
200
+ booking_entries = patient_data['bookings'][booking]
201
+ booking_prompt = generate_analysis_prompt(patient_data, [booking])
202
+ token_count = estimate_tokens(booking_prompt)
203
+ if current_size + token_count > MAX_TOKENS:
204
+ if current_chunk:
205
+ chunks.append(current_chunk)
206
+ current_chunk = [booking]
207
+ current_size = token_count
208
+ else:
209
+ current_chunk.append(booking)
210
+ current_size += token_count
211
 
212
+ if current_chunk:
213
+ chunks.append(current_chunk)
214
 
215
+ chunk_responses = []
216
+ for chunk in chunks:
217
+ prompt = generate_analysis_prompt(patient_data, chunk) + "\n\n" + "\n".join([
218
+ "**Please analyze this part of the patient history.**",
219
+ "Focus on identifying patterns, issues, and possible missed opportunities."
220
+ ])
221
+ chunk_responses.append(analyze_with_agent(agent, prompt))
222
 
223
+ final_prompt = "\n\n".join(chunk_responses) + "\n\nSummarize the key insights, missed diagnoses, medication issues, inconsistencies and follow-up recommendations in a clear and structured way."
224
+ final_response = analyze_with_agent(agent, final_prompt)
225
+ full_report = f"# \U0001f9e0 Full Patient History Analysis\n\n{final_response}"
226
+
227
+ report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
228
  with open(report_path, 'w') as f:
229
  f.write(full_report)
230
 
231
+ return [("user", "[Excel Uploaded: Processing Analysis...]"), ("assistant", full_report)], report_path
232
 
233
  except Exception as e:
234
  raise gr.Error(f"Error: {str(e)}")
235
 
236
+
237
  def create_ui(agent):
238
+ with gr.Blocks(title="Patient History Chat") as demo:
239
+ chatbot = gr.Chatbot(label="Clinical Assistant", show_copy_button=True)
240
+ file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
241
+ analyze_btn = gr.Button("🧠 Analyze Patient History")
242
+ report_output = gr.File(label="Download Report")
243
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  analyze_btn.click(
245
  analyze,
246
+ inputs=[file_upload],
247
+ outputs=[chatbot, report_output]
248
  )
249
+
250
  return demo
251
 
252
+
253
  if __name__ == "__main__":
254
  try:
255
  agent = init_agent()
 
262
  )
263
  except Exception as e:
264
  print(f"Error: {str(e)}")
265
+ sys.exit(1)