Ali2206 commited on
Commit
7771dd9
·
verified ·
1 Parent(s): 8b1bbeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -97
app.py CHANGED
@@ -3,12 +3,9 @@ import os
3
  import pandas as pd
4
  import json
5
  import gradio as gr
6
- from typing import List, Tuple, Union, Generator, BinaryIO
7
- import hashlib
8
- import shutil
9
  import re
10
  from datetime import datetime
11
- from concurrent.futures import ThreadPoolExecutor, as_completed
12
 
13
  # Setup directories
14
  persistent_dir = "/data/hf_cache"
@@ -42,22 +39,20 @@ def clean_response(text: str) -> str:
42
  def estimate_tokens(text: str) -> int:
43
  return len(text) // 3.5 + 1
44
 
45
- def extract_text_from_excel(file_obj) -> str:
46
- """Handle both Gradio file objects and direct file paths"""
47
  all_text = []
48
  try:
49
- # Handle Gradio file object
50
- if hasattr(file_obj, 'name'):
51
- file_path = file_obj.name
52
- # Handle direct file path
53
- elif isinstance(file_obj, (str, os.PathLike)):
54
  file_path = file_obj
55
  else:
56
  raise ValueError("Unsupported file input type")
57
 
58
- # Verify file exists
59
  if not os.path.exists(file_path):
60
- raise FileNotFoundError(f"File not found at path: {file_path}")
61
 
62
  xls = pd.ExcelFile(file_path)
63
 
@@ -76,44 +71,41 @@ def extract_text_from_excel(file_obj) -> str:
76
  except Exception as e:
77
  raise ValueError(f"❌ Error processing Excel file: {str(e)}")
78
 
79
- def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
80
- effective_max = max_tokens - PROMPT_OVERHEAD
81
  lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
82
  for line in lines:
83
  t = estimate_tokens(line)
84
  if curr_tokens + t > effective_max:
85
  if curr_chunk:
86
  chunks.append("\n".join(curr_chunk))
87
- if len(chunks) >= max_chunks:
88
- break
89
  curr_chunk, curr_tokens = [line], t
90
  else:
91
  curr_chunk.append(line)
92
  curr_tokens += t
93
- if curr_chunk and len(chunks) < max_chunks:
94
  chunks.append("\n".join(curr_chunk))
95
  return chunks
96
 
97
  def build_prompt_from_text(chunk: str) -> str:
98
  return f"""
99
- ### Unstructured Clinical Records
100
 
101
- Analyze the following clinical notes and provide a detailed, concise summary focusing on:
102
- - Diagnostic Patterns
103
- - Medication Issues
104
- - Missed Opportunities
105
- - Inconsistencies
106
- - Follow-up Recommendations
107
 
108
  ---
109
 
110
  {chunk}
111
 
112
  ---
113
- Respond in well-structured bullet points with medical reasoning.
114
  """
115
 
116
- def init_agent():
117
  tool_path = os.path.join(tool_cache_dir, "new_tool.json")
118
  if not os.path.exists(tool_path):
119
  default_tool = {
@@ -137,7 +129,7 @@ def init_agent():
137
  agent.init_model()
138
  return agent
139
 
140
- def stream_report(agent, input_file, full_output: str) -> Generator[Tuple[str, Union[str, None], str], None, None]:
141
  accumulated_text = ""
142
  try:
143
  if input_file is None:
@@ -146,12 +138,11 @@ def stream_report(agent, input_file, full_output: str) -> Generator[Tuple[str, U
146
 
147
  try:
148
  text = extract_text_from_excel(input_file)
 
149
  except Exception as e:
150
  yield f"❌ {str(e)}", None, ""
151
  return
152
 
153
- chunks = split_text_into_chunks(text)
154
-
155
  for i, chunk in enumerate(chunks):
156
  prompt = build_prompt_from_text(chunk)
157
  partial = ""
@@ -160,87 +151,50 @@ def stream_report(agent, input_file, full_output: str) -> Generator[Tuple[str, U
160
  max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
161
  call_agent=False, conversation=[]
162
  ):
163
- if isinstance(res, str):
164
- partial += res
165
- elif hasattr(res, "content"):
166
- partial += res.content
167
  cleaned = clean_response(partial)
168
- accumulated_text += f"\n\n📄 **Chunk {i+1}**:\n{cleaned}"
169
  yield accumulated_text, None, ""
170
 
171
- summary_prompt = f"Summarize this analysis in a final structured report:\n\n" + accumulated_text
172
  final_report = ""
173
  for res in agent.run_gradio_chat(
174
  message=summary_prompt, history=[], temperature=0.2,
175
  max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
176
  call_agent=False, conversation=[]
177
  ):
178
- if isinstance(res, str):
179
- final_report += res
180
- elif hasattr(res, "content"):
181
- final_report += res.content
182
 
183
  cleaned = clean_response(final_report)
184
- accumulated_text += f"\n\n📊 **Final Summary**:\n{cleaned}"
185
  report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
186
  with open(report_path, 'w') as f:
187
- f.write(f"# 🧠 Final Patient Report\n\n{cleaned}")
188
 
189
- yield accumulated_text, report_path, cleaned
190
 
191
  except Exception as e:
192
- yield f"❌ Unexpected error: {str(e)}", None, ""
193
-
194
- def create_ui(agent):
195
- with gr.Blocks(css="""
196
- body {
197
- background: #10141f;
198
- color: #ffffff;
199
- font-family: 'Inter', sans-serif;
200
- margin: 0;
201
- padding: 0;
202
- }
203
- .gradio-container {
204
- padding: 30px;
205
- width: 100vw;
206
- max-width: 100%;
207
- border-radius: 0;
208
- background-color: #1a1f2e;
209
- }
210
- .output-markdown {
211
- background-color: #131720;
212
- border-radius: 12px;
213
- padding: 20px;
214
- min-height: 600px;
215
- overflow-y: auto;
216
- border: 1px solid #2c3344;
217
- }
218
- .gr-button {
219
- background: linear-gradient(135deg, #4b4ced, #37b6e9);
220
- color: white;
221
- font-weight: 500;
222
- border: none;
223
- padding: 10px 20px;
224
- border-radius: 8px;
225
- transition: background 0.3s ease;
226
- }
227
- .gr-button:hover {
228
- background: linear-gradient(135deg, #37b6e9, #4b4ced);
229
- }
230
- """) as demo:
231
- gr.Markdown("""# 🧠 Clinical Reasoning Assistant
232
- Upload clinical Excel records below and click **Analyze** to generate a medical summary.
233
- """)
234
- file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
235
- analyze_btn = gr.Button("Analyze")
236
- report_output_markdown = gr.Markdown(elem_classes="output-markdown")
237
- report_file = gr.File(label="Download Report", visible=False)
238
- full_output = gr.State(value="")
239
 
240
  analyze_btn.click(
241
  fn=stream_report,
242
  inputs=[file_upload, full_output],
243
- outputs=[report_output_markdown, report_file, full_output]
244
  )
245
 
246
  return demo
@@ -250,12 +204,10 @@ if __name__ == "__main__":
250
  agent = init_agent()
251
  demo = create_ui(agent)
252
  demo.launch(
253
- server_name="0.0.0.0",
254
- server_port=7860,
255
- allowed_paths=["/data/hf_cache/reports"],
256
- share=True,
257
- show_error=True
258
  )
259
  except Exception as e:
260
- print(f"Error: {str(e)}", file=sys.stderr)
261
  sys.exit(1)
 
3
  import pandas as pd
4
  import json
5
  import gradio as gr
6
+ from typing import List, Tuple, Union, Generator, BinaryIO, Dict, Any
 
 
7
  import re
8
  from datetime import datetime
 
9
 
10
  # Setup directories
11
  persistent_dir = "/data/hf_cache"
 
39
  def estimate_tokens(text: str) -> int:
40
  return len(text) // 3.5 + 1
41
 
42
+ def extract_text_from_excel(file_obj: Union[str, Dict[str, Any]]) -> str:
43
+ """Handle Gradio file upload object which is a dictionary with 'name' and other keys"""
44
  all_text = []
45
  try:
46
+ # Handle Gradio file upload object
47
+ if isinstance(file_obj, dict) and 'name' in file_obj:
48
+ file_path = file_obj['name']
49
+ elif isinstance(file_obj, str):
 
50
  file_path = file_obj
51
  else:
52
  raise ValueError("Unsupported file input type")
53
 
 
54
  if not os.path.exists(file_path):
55
+ raise FileNotFoundError(f"Temporary upload file not found at: {file_path}")
56
 
57
  xls = pd.ExcelFile(file_path)
58
 
 
71
  except Exception as e:
72
  raise ValueError(f"❌ Error processing Excel file: {str(e)}")
73
 
74
+ def split_text_into_chunks(text: str) -> List[str]:
75
+ effective_max = MAX_CHUNK_TOKENS - PROMPT_OVERHEAD
76
  lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
77
  for line in lines:
78
  t = estimate_tokens(line)
79
  if curr_tokens + t > effective_max:
80
  if curr_chunk:
81
  chunks.append("\n".join(curr_chunk))
 
 
82
  curr_chunk, curr_tokens = [line], t
83
  else:
84
  curr_chunk.append(line)
85
  curr_tokens += t
86
+ if curr_chunk:
87
  chunks.append("\n".join(curr_chunk))
88
  return chunks
89
 
90
  def build_prompt_from_text(chunk: str) -> str:
91
  return f"""
92
+ ### Clinical Records Analysis
93
 
94
+ Please analyze these clinical notes and provide:
95
+ - Key diagnostic indicators
96
+ - Current medications and potential issues
97
+ - Recommended follow-up actions
98
+ - Any inconsistencies or concerns
 
99
 
100
  ---
101
 
102
  {chunk}
103
 
104
  ---
105
+ Provide a structured response with clear medical reasoning.
106
  """
107
 
108
+ def init_agent() -> TxAgent:
109
  tool_path = os.path.join(tool_cache_dir, "new_tool.json")
110
  if not os.path.exists(tool_path):
111
  default_tool = {
 
129
  agent.init_model()
130
  return agent
131
 
132
+ def stream_report(agent: TxAgent, input_file: Union[str, Dict[str, Any]], full_output: str) -> Generator[Tuple[str, Union[str, None], str], None, None]:
133
  accumulated_text = ""
134
  try:
135
  if input_file is None:
 
138
 
139
  try:
140
  text = extract_text_from_excel(input_file)
141
+ chunks = split_text_into_chunks(text)
142
  except Exception as e:
143
  yield f"❌ {str(e)}", None, ""
144
  return
145
 
 
 
146
  for i, chunk in enumerate(chunks):
147
  prompt = build_prompt_from_text(chunk)
148
  partial = ""
 
151
  max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
152
  call_agent=False, conversation=[]
153
  ):
154
+ partial += res if isinstance(res, str) else res.content
155
+
 
 
156
  cleaned = clean_response(partial)
157
+ accumulated_text += f"\n\n📄 Analysis Part {i+1}:\n{cleaned}"
158
  yield accumulated_text, None, ""
159
 
160
+ summary_prompt = f"Please summarize this analysis:\n\n{accumulated_text}"
161
  final_report = ""
162
  for res in agent.run_gradio_chat(
163
  message=summary_prompt, history=[], temperature=0.2,
164
  max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
165
  call_agent=False, conversation=[]
166
  ):
167
+ final_report += res if isinstance(res, str) else res.content
 
 
 
168
 
169
  cleaned = clean_response(final_report)
 
170
  report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
171
  with open(report_path, 'w') as f:
172
+ f.write(f"# Clinical Analysis Report\n\n{cleaned}")
173
 
174
+ yield f"{accumulated_text}\n\n📊 Final Summary:\n{cleaned}", report_path, cleaned
175
 
176
  except Exception as e:
177
+ yield f"❌ Processing error: {str(e)}", None, ""
178
+
179
+ def create_ui(agent: TxAgent) -> gr.Blocks:
180
+ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 900px !important}") as demo:
181
+ gr.Markdown("""# Clinical Records Analyzer""")
182
+ with gr.Row():
183
+ file_upload = gr.File(label="Upload Excel File", file_types=[".xlsx"])
184
+ analyze_btn = gr.Button("Analyze", variant="primary")
185
+
186
+ with gr.Row():
187
+ with gr.Column(scale=2):
188
+ report_output = gr.Markdown()
189
+ with gr.Column(scale=1):
190
+ report_file = gr.File(label="Download Report", visible=False)
191
+
192
+ full_output = gr.State()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  analyze_btn.click(
195
  fn=stream_report,
196
  inputs=[file_upload, full_output],
197
+ outputs=[report_output, report_file, full_output]
198
  )
199
 
200
  return demo
 
204
  agent = init_agent()
205
  demo = create_ui(agent)
206
  demo.launch(
207
+ server_name="0.0.0.0",
208
+ server_port=7860,
209
+ share=False
 
 
210
  )
211
  except Exception as e:
212
+ print(f"Application error: {str(e)}", file=sys.stderr)
213
  sys.exit(1)