Ali2206 commited on
Commit
a42578c
·
verified ·
1 Parent(s): 83aa052

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -12
app.py CHANGED
@@ -33,26 +33,30 @@ MAX_CHUNK_TOKENS = 8192
33
  MAX_NEW_TOKENS = 2048
34
  PROMPT_OVERHEAD = 500
35
 
 
36
  def clean_response(text: str) -> str:
37
  text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
38
  text = re.sub(r"\n{3,}", "\n\n", text)
39
  text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
40
  return text.strip()
41
 
 
42
  def estimate_tokens(text: str) -> int:
43
  return len(text) // 3.5 + 1
44
 
 
45
  def extract_text_from_excel(file_path: str) -> str:
46
  all_text = []
47
  xls = pd.ExcelFile(file_path)
48
  for sheet_name in xls.sheet_names:
49
  df = xls.parse(sheet_name).astype(str).fillna("")
50
- rows = df.apply(lambda row: " | ".join(row), axis=1)
51
- sheet_text = [f"[{sheet_name}] {line}" for line in rows]
52
  all_text.extend(sheet_text)
53
  return "\n".join(all_text)
54
 
55
- def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
 
56
  effective_max = max_tokens - PROMPT_OVERHEAD
57
  lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
58
  for line in lines:
@@ -60,14 +64,17 @@ def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> Lis
60
  if curr_tokens + t > effective_max:
61
  if curr_chunk:
62
  chunks.append("\n".join(curr_chunk))
 
 
63
  curr_chunk, curr_tokens = [line], t
64
  else:
65
  curr_chunk.append(line)
66
  curr_tokens += t
67
- if curr_chunk:
68
  chunks.append("\n".join(curr_chunk))
69
  return chunks
70
 
 
71
  def build_prompt_from_text(chunk: str) -> str:
72
  return f"""
73
  ### Unstructured Clinical Records
@@ -87,6 +94,7 @@ Analyze the following clinical notes and provide a detailed, concise summary foc
87
  Respond in well-structured bullet points with medical reasoning.
88
  """
89
 
 
90
  def init_agent():
91
  tool_path = os.path.join(tool_cache_dir, "new_tool.json")
92
  if not os.path.exists(tool_path):
@@ -103,6 +111,7 @@ def init_agent():
103
  agent.init_model()
104
  return agent
105
 
 
106
  def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
107
  messages = chatbot_state if chatbot_state else []
108
  if file is None or not hasattr(file, "name"):
@@ -116,7 +125,11 @@ def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tu
116
  def analyze_chunk(i, chunk):
117
  prompt = build_prompt_from_text(chunk)
118
  response = ""
119
- for res in agent.run_gradio_chat(message=prompt, history=[], temperature=0.2, max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS, call_agent=False, conversation=[]):
 
 
 
 
120
  if isinstance(res, str):
121
  response += res
122
  elif hasattr(res, "content"):
@@ -127,7 +140,7 @@ def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tu
127
  response += r.content
128
  return i, clean_response(response)
129
 
130
- with ThreadPoolExecutor(max_workers=1) as executor:
131
  futures = [executor.submit(analyze_chunk, i, c) for i, c in enumerate(chunks)]
132
  for f in as_completed(futures):
133
  i, result = f.result()
@@ -141,7 +154,11 @@ def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tu
141
  messages.append({"role": "assistant", "content": "📊 Generating final report..."})
142
 
143
  final_report = ""
144
- for res in agent.run_gradio_chat(message=summary_prompt, history=[], temperature=0.2, max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS, call_agent=False, conversation=[]):
 
 
 
 
145
  if isinstance(res, str):
146
  final_report += res
147
  elif hasattr(res, "content"):
@@ -156,20 +173,22 @@ def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tu
156
  messages.append({"role": "assistant", "content": f"✅ Report generated and saved: {os.path.basename(report_path)}"})
157
  return messages, report_path
158
 
 
159
  def create_ui(agent):
160
  with gr.Blocks(css="""
161
  body {
162
  background: #10141f;
163
  color: #ffffff;
164
  font-family: 'Inter', sans-serif;
 
 
165
  }
166
  .gradio-container {
167
  padding: 30px;
168
- max-width: 900px;
169
- margin: auto;
170
- border-radius: 16px;
171
  background-color: #1a1f2e;
172
- box-shadow: 0 0 20px rgba(0, 0, 0, 0.5);
173
  }
174
  .chatbot {
175
  background-color: #131720;
@@ -203,7 +222,8 @@ Upload clinical Excel records below and click **Analyze** to generate a medical
203
 
204
  def update_ui(file, current_state):
205
  messages, report_path = process_final_report(agent, file, current_state)
206
- return messages, gr.update(visible=report_path is not None, value=report_path), messages
 
207
 
208
  analyze_btn.click(
209
  fn=update_ui,
@@ -213,6 +233,7 @@ Upload clinical Excel records below and click **Analyze** to generate a medical
213
 
214
  return demo
215
 
 
216
  if __name__ == "__main__":
217
  try:
218
  agent = init_agent()
 
33
  MAX_NEW_TOKENS = 2048
34
  PROMPT_OVERHEAD = 500
35
 
36
+
37
  def clean_response(text: str) -> str:
38
  text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
39
  text = re.sub(r"\n{3,}", "\n\n", text)
40
  text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
41
  return text.strip()
42
 
43
+
44
  def estimate_tokens(text: str) -> int:
45
  return len(text) // 3.5 + 1
46
 
47
+
48
  def extract_text_from_excel(file_path: str) -> str:
49
  all_text = []
50
  xls = pd.ExcelFile(file_path)
51
  for sheet_name in xls.sheet_names:
52
  df = xls.parse(sheet_name).astype(str).fillna("")
53
+ rows = df.apply(lambda row: " | ".join([cell for cell in row if cell.strip()]), axis=1)
54
+ sheet_text = [f"[{sheet_name}] {line}" for line in rows if line.strip()]
55
  all_text.extend(sheet_text)
56
  return "\n".join(all_text)
57
 
58
+
59
+ def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS, max_chunks: int = 30) -> List[str]:
60
  effective_max = max_tokens - PROMPT_OVERHEAD
61
  lines, chunks, curr_chunk, curr_tokens = text.split("\n"), [], [], 0
62
  for line in lines:
 
64
  if curr_tokens + t > effective_max:
65
  if curr_chunk:
66
  chunks.append("\n".join(curr_chunk))
67
+ if len(chunks) >= max_chunks:
68
+ break
69
  curr_chunk, curr_tokens = [line], t
70
  else:
71
  curr_chunk.append(line)
72
  curr_tokens += t
73
+ if curr_chunk and len(chunks) < max_chunks:
74
  chunks.append("\n".join(curr_chunk))
75
  return chunks
76
 
77
+
78
  def build_prompt_from_text(chunk: str) -> str:
79
  return f"""
80
  ### Unstructured Clinical Records
 
94
  Respond in well-structured bullet points with medical reasoning.
95
  """
96
 
97
+
98
  def init_agent():
99
  tool_path = os.path.join(tool_cache_dir, "new_tool.json")
100
  if not os.path.exists(tool_path):
 
111
  agent.init_model()
112
  return agent
113
 
114
+
115
  def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
116
  messages = chatbot_state if chatbot_state else []
117
  if file is None or not hasattr(file, "name"):
 
125
  def analyze_chunk(i, chunk):
126
  prompt = build_prompt_from_text(chunk)
127
  response = ""
128
+ for res in agent.run_gradio_chat(
129
+ message=prompt, history=[], temperature=0.2,
130
+ max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
131
+ call_agent=False, conversation=[]
132
+ ):
133
  if isinstance(res, str):
134
  response += res
135
  elif hasattr(res, "content"):
 
140
  response += r.content
141
  return i, clean_response(response)
142
 
143
+ with ThreadPoolExecutor(max_workers=4) as executor:
144
  futures = [executor.submit(analyze_chunk, i, c) for i, c in enumerate(chunks)]
145
  for f in as_completed(futures):
146
  i, result = f.result()
 
154
  messages.append({"role": "assistant", "content": "📊 Generating final report..."})
155
 
156
  final_report = ""
157
+ for res in agent.run_gradio_chat(
158
+ message=summary_prompt, history=[], temperature=0.2,
159
+ max_new_tokens=MAX_NEW_TOKENS, max_token=MAX_MODEL_TOKENS,
160
+ call_agent=False, conversation=[]
161
+ ):
162
  if isinstance(res, str):
163
  final_report += res
164
  elif hasattr(res, "content"):
 
173
  messages.append({"role": "assistant", "content": f"✅ Report generated and saved: {os.path.basename(report_path)}"})
174
  return messages, report_path
175
 
176
+
177
  def create_ui(agent):
178
  with gr.Blocks(css="""
179
  body {
180
  background: #10141f;
181
  color: #ffffff;
182
  font-family: 'Inter', sans-serif;
183
+ margin: 0;
184
+ padding: 0;
185
  }
186
  .gradio-container {
187
  padding: 30px;
188
+ width: 100vw;
189
+ max-width: 100%;
190
+ border-radius: 0;
191
  background-color: #1a1f2e;
 
192
  }
193
  .chatbot {
194
  background-color: #131720;
 
222
 
223
  def update_ui(file, current_state):
224
  messages, report_path = process_final_report(agent, file, current_state)
225
+ chat_format = [(msg["role"], msg["content"]) for msg in messages if isinstance(msg, dict)]
226
+ return chat_format, gr.update(visible=report_path is not None, value=report_path), messages
227
 
228
  analyze_btn.click(
229
  fn=update_ui,
 
233
 
234
  return demo
235
 
236
+
237
  if __name__ == "__main__":
238
  try:
239
  agent = init_agent()