Ali2206 commited on
Commit
a57b988
·
verified ·
1 Parent(s): 0e6914c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -124
app.py CHANGED
@@ -1,12 +1,14 @@
1
  import sys
2
  import os
3
  import pandas as pd
 
4
  import gradio as gr
5
- from typing import List, Tuple
 
 
6
  import re
7
  from datetime import datetime
8
  from concurrent.futures import ThreadPoolExecutor, as_completed
9
- import copy
10
 
11
  # Setup directories
12
  persistent_dir = "/data/hf_cache"
@@ -14,9 +16,10 @@ os.makedirs(persistent_dir, exist_ok=True)
14
 
15
  model_cache_dir = os.path.join(persistent_dir, "txagent_models")
16
  tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
 
17
  report_dir = os.path.join(persistent_dir, "reports")
18
 
19
- for d in [model_cache_dir, tool_cache_dir, report_dir]:
20
  os.makedirs(d, exist_ok=True)
21
 
22
  os.environ["HF_HOME"] = model_cache_dir
@@ -36,6 +39,9 @@ def clean_response(text: str) -> str:
36
  text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
37
  return text.strip()
38
 
 
 
 
39
  def extract_text_from_excel(file_path: str) -> str:
40
  all_text = []
41
  xls = pd.ExcelFile(file_path)
@@ -46,116 +52,121 @@ def extract_text_from_excel(file_path: str) -> str:
46
  all_text.extend(sheet_text)
47
  return "\n".join(all_text)
48
 
49
- def split_text_into_chunks(text: str) -> List[str]:
50
- effective_max = MAX_CHUNK_TOKENS - PROMPT_OVERHEAD
51
- lines, chunks, curr_chunk = text.split("\n"), [], []
52
- curr_tokens = sum(len(line.split()) for line in curr_chunk)
53
-
54
  for line in lines:
55
- line_tokens = len(line.split())
56
- if curr_tokens + line_tokens > effective_max:
57
  if curr_chunk:
58
  chunks.append("\n".join(curr_chunk))
59
- curr_chunk, curr_tokens = [line], line_tokens
 
 
60
  else:
61
  curr_chunk.append(line)
62
- curr_tokens += line_tokens
63
- if curr_chunk:
64
  chunks.append("\n".join(curr_chunk))
65
  return chunks
66
 
67
  def build_prompt_from_text(chunk: str) -> str:
68
- return f"""Analyze these clinical notes and provide:
69
- - Diagnostic patterns
70
- - Medication issues
71
- - Missed opportunities
72
- - Inconsistencies
73
- - Follow-up recommendations
74
-
75
- Respond with clear bullet points:
76
-
77
- {chunk}"""
78
-
79
- class AgentWrapper:
80
- def __init__(self):
81
- self.agent = None
82
-
83
- def init_agent(self):
84
- tool_path = os.path.join(tool_cache_dir, "new_tool.json")
85
- if not os.path.exists(tool_path):
86
- import shutil
87
- shutil.copy("data/new_tool.json", tool_path)
88
- self.agent = TxAgent(
89
- model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
90
- rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
91
- tool_files_dict={"new_tool": tool_path},
92
- force_finish=True,
93
- enable_checker=True,
94
- step_rag_num=4,
95
- seed=100
96
- )
97
- self.agent.init_model()
98
- return self.agent
99
-
100
- def process_final_report(file, chatbot_state: List[Tuple[str, str]], agent: TxAgent):
101
- messages = copy.deepcopy(chatbot_state) if chatbot_state else []
102
-
103
- if file is None:
104
- messages.append(("assistant", "❌ Please upload a valid Excel file."))
105
- return messages, None
106
-
107
- messages.append(("user", f"Processing Excel file: {os.path.basename(file.name)}"))
108
- yield messages, None
109
-
110
- try:
111
- text = extract_text_from_excel(file.name)
112
- chunks = split_text_into_chunks(text)
113
-
114
- messages.append(("assistant", "🔍 Analyzing clinical data..."))
115
- yield messages, None
116
-
117
- full_report = []
118
- for i, chunk in enumerate(chunks, 1):
119
- prompt = build_prompt_from_text(chunk)
120
- response = ""
121
-
122
- for res in agent.run_gradio_chat(
123
- message=prompt, history=[], temperature=0.2,
124
- max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
125
- call_agent=False, conversation=[]
126
- ):
127
- if isinstance(res, str):
128
- response += res
129
- elif hasattr(res, "content"):
130
- response += res.content
131
-
132
- cleaned = clean_response(response)
133
- full_report.append(cleaned)
134
-
135
- progress_msg = f"✅ Analyzed section {i}/{len(chunks)}"
136
- if len(messages) > 2 and "Analyzed section" in messages[-1][1]:
137
- messages[-1] = ("assistant", progress_msg)
138
- else:
139
- messages.append(("assistant", progress_msg))
140
- yield messages, None
141
-
142
- final_report = "## 🧠 Final Clinical Report\n\n" + "\n\n".join(full_report)
143
- report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
144
- with open(report_path, 'w') as f:
145
- f.write(final_report)
146
-
147
- messages.append(("assistant", f"✅ Report generated and saved: {os.path.basename(report_path)}"))
148
- messages.append(("assistant", final_report))
149
- yield messages, report_path
150
-
151
- except Exception as e:
152
- messages.append(("assistant", f"❌ Error: {str(e)}"))
153
- yield messages, None
154
-
155
- def create_ui():
156
- agent_wrapper = AgentWrapper()
157
- agent = agent_wrapper.init_agent()
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  with gr.Blocks(css="""
160
  body {
161
  background: #10141f;
@@ -192,39 +203,33 @@ def create_ui():
192
  background: linear-gradient(135deg, #37b6e9, #4b4ced);
193
  }
194
  """) as demo:
195
- gr.Markdown("""# Clinical Reasoning Assistant
196
  Upload clinical Excel records below and click **Analyze** to generate a medical summary.
197
  """)
198
-
199
- with gr.Row():
200
- file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
201
- analyze_btn = gr.Button("Analyze", variant="primary")
202
-
203
- chatbot = gr.Chatbot(label="Chatbot", elem_classes="chatbot", type="messages")
204
  report_output = gr.File(label="Download Report", visible=False)
205
- chatbot_state = gr.State([])
206
 
207
- def wrapped_process(file, state):
208
- yield from process_final_report(file, state, agent)
 
209
 
210
  analyze_btn.click(
211
- fn=wrapped_process,
212
  inputs=[file_upload, chatbot_state],
213
- outputs=[chatbot, report_output],
214
- show_progress="hidden"
215
  )
216
 
217
  return demo
218
 
219
  if __name__ == "__main__":
220
  try:
221
- demo = create_ui()
222
- demo.launch(
223
- server_name="0.0.0.0",
224
- server_port=7860,
225
- allowed_paths=["/data/hf_cache/reports"],
226
- share=False
227
- )
228
  except Exception as e:
229
  print(f"Error: {str(e)}")
230
- sys.exit(1)
 
1
  import sys
2
  import os
3
  import pandas as pd
4
+ import json
5
  import gradio as gr
6
+ from typing import List, Tuple, Dict, Any, Union
7
+ import hashlib
8
+ import shutil
9
  import re
10
  from datetime import datetime
11
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
12
 
13
  # Setup directories
14
  persistent_dir = "/data/hf_cache"
 
16
 
17
  model_cache_dir = os.path.join(persistent_dir, "txagent_models")
18
  tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
19
+ file_cache_dir = os.path.join(persistent_dir, "cache")
20
  report_dir = os.path.join(persistent_dir, "reports")
21
 
22
+ for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
23
  os.makedirs(d, exist_ok=True)
24
 
25
  os.environ["HF_HOME"] = model_cache_dir
 
39
  text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
40
  return text.strip()
41
 
42
+ def estimate_tokens(text: str) -> int:
43
+ return len(text) // 3.5 + 1
44
+
45
  def extract_text_from_excel(file_path: str) -> str:
46
  all_text = []
47
  xls = pd.ExcelFile(file_path)
 
52
  all_text.extend(sheet_text)
53
  return "\n".join(all_text)
54
 
55
+ def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
56
+ effective_max = max_tokens - PROMPT_OVERHEAD
57
+ lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
 
 
58
  for line in lines:
59
+ t = estimate_tokens(line)
60
+ if curr_tokens + t > effective_max:
61
  if curr_chunk:
62
  chunks.append("\n".join(curr_chunk))
63
+ if len(chunks) >= max_chunks:
64
+ break
65
+ curr_chunk, curr_tokens = [line], t
66
  else:
67
  curr_chunk.append(line)
68
+ curr_tokens += t
69
+ if curr_chunk and len(chunks) < max_chunks:
70
  chunks.append("\n".join(curr_chunk))
71
  return chunks
72
 
73
  def build_prompt_from_text(chunk: str) -> str:
74
+ return f"""
75
+ ### Unstructured Clinical Records
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ Analyze the following clinical notes and provide a detailed, concise summary focusing on:
78
+ - Diagnostic Patterns
79
+ - Medication Issues
80
+ - Missed Opportunities
81
+ - Inconsistencies
82
+ - Follow-up Recommendations
83
+
84
+ ---
85
+
86
+ {chunk}
87
+
88
+ ---
89
+ Respond in well-structured bullet points with medical reasoning.
90
+ """
91
+
92
+ def init_agent():
93
+ tool_path = os.path.join(tool_cache_dir, "new_tool.json")
94
+ if not os.path.exists(tool_path):
95
+ shutil.copy(os.path.abspath("data/new_tool.json"), tool_path)
96
+ agent = TxAgent(
97
+ model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
98
+ rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
99
+ tool_files_dict={"new_tool": tool_path},
100
+ force_finish=True,
101
+ enable_checker=True,
102
+ step_rag_num=4,
103
+ seed=100
104
+ )
105
+ agent.init_model()
106
+ return agent
107
+
108
+ def process_final_report(agent, file, chatbot_state: List[Tuple[str, str]]) -> Tuple[List[Tuple[str, str]], Union[str, None], str]:
109
+ messages = chatbot_state if chatbot_state else []
110
+ if file is None or not hasattr(file, "name"):
111
+ return messages + [("assistant", "❌ Please upload a valid Excel file.")], None, ""
112
+
113
+ messages.append(("user", f"📎 Uploaded file: {os.path.basename(file.name)}"))
114
+ text = extract_text_from_excel(file.name)
115
+ chunks = split_text_into_chunks(text)
116
+ chunk_responses = [None] * len(chunks)
117
+
118
+ def analyze_chunk(i, chunk):
119
+ prompt = build_prompt_from_text(chunk)
120
+ response = ""
121
+ for res in agent.run_gradio_chat(
122
+ message=prompt, history=[], temperature=0.2,
123
+ max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
124
+ call_agent=False, conversation=[]
125
+ ):
126
+ if isinstance(res, str):
127
+ response += res
128
+ elif hasattr(res, "content"):
129
+ response += res.content
130
+ elif isinstance(res, list):
131
+ for r in res:
132
+ if hasattr(r, "content"):
133
+ response += r.content
134
+ return i, clean_response(response)
135
+
136
+ with ThreadPoolExecutor(max_workers=1) as executor:
137
+ futures = [executor.submit(analyze_chunk, i, c) for i, c in enumerate(chunks)]
138
+ for f in as_completed(futures):
139
+ i, result = f.result()
140
+ chunk_responses[i] = result
141
+
142
+ valid = [r for r in chunk_responses if r and not r.startswith("❌")]
143
+ if not valid:
144
+ return messages + [("assistant", "❌ No valid results found in the file.")], None, ""
145
+
146
+ summary_prompt = f"Summarize this analysis in a final structured report:\n\n" + "\n\n".join(valid)
147
+ messages.append(("assistant", "⏳ Generating the final report..."))
148
+
149
+ final_report = ""
150
+ for res in agent.run_gradio_chat(
151
+ message=summary_prompt, history=[], temperature=0.2,
152
+ max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
153
+ call_agent=False, conversation=[]
154
+ ):
155
+ if isinstance(res, str):
156
+ final_report += res
157
+ elif hasattr(res, "content"):
158
+ final_report += res.content
159
+
160
+ cleaned = clean_response(final_report)
161
+ messages.append(("assistant", cleaned)) # ✅ Append answer to chat
162
+
163
+ report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
164
+ with open(report_path, 'w') as f:
165
+ f.write(f"# 🧠 Final Patient Report\n\n{cleaned}")
166
+
167
+ return messages, report_path, cleaned
168
+
169
+ def create_ui(agent):
170
  with gr.Blocks(css="""
171
  body {
172
  background: #10141f;
 
203
  background: linear-gradient(135deg, #37b6e9, #4b4ced);
204
  }
205
  """) as demo:
206
+ gr.Markdown("""# 🧠 Clinical Reasoning Assistant
207
  Upload clinical Excel records below and click **Analyze** to generate a medical summary.
208
  """)
209
+ chatbot = gr.Chatbot(label="Chatbot", elem_classes="chatbot", type="tuples")
210
+ report_output_markdown = gr.Markdown(visible=False)
211
+ file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
212
+ analyze_btn = gr.Button("Analyze")
 
 
213
  report_output = gr.File(label="Download Report", visible=False)
214
+ chatbot_state = gr.State(value=[])
215
 
216
+ def update_ui(file, current_state):
217
+ messages, report_path, final_text = process_final_report(agent, file, current_state)
218
+ return messages, gr.update(visible=report_path is not None, value=report_path), messages, gr.update(visible=True, value=final_text)
219
 
220
  analyze_btn.click(
221
+ fn=update_ui,
222
  inputs=[file_upload, chatbot_state],
223
+ outputs=[chatbot, report_output, chatbot_state, report_output_markdown]
 
224
  )
225
 
226
  return demo
227
 
228
  if __name__ == "__main__":
229
  try:
230
+ agent = init_agent()
231
+ demo = create_ui(agent)
232
+ demo.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)
 
 
 
 
233
  except Exception as e:
234
  print(f"Error: {str(e)}")
235
+ sys.exit(1)