Ali2206 commited on
Commit
fb0ec4e
·
verified ·
1 Parent(s): 1f0c81e

Update ui/ui_core.py

Browse files
Files changed (1) hide show
  1. ui/ui_core.py +31 -59
ui/ui_core.py CHANGED
@@ -3,8 +3,7 @@ import os
3
  import pandas as pd
4
  import pdfplumber
5
  import gradio as gr
6
- import re
7
- from typing import List, Dict, Optional
8
 
9
  # ✅ Fix: Add src to Python path
10
  sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
@@ -12,43 +11,12 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..",
12
  from txagent.txagent import TxAgent
13
 
14
  def sanitize_utf8(text: str) -> str:
15
- """Clean text of problematic Unicode characters"""
16
- return text.encode('utf-8', 'ignore').decode('utf-8')
17
-
18
- def clean_final_response(response: str) -> str:
19
- """Remove tool calls and other artifacts from final response"""
20
- # Split on TOOL_CALLS if present
21
- if '[TOOL_CALLS]' in response:
22
- response = response.split('[TOOL_CALLS]')[0]
23
- # Remove any remaining special tokens
24
- response = re.sub(r'\[[A-Z_]+\]', '', response)
25
- return response.strip()
26
-
27
- def chunk_text(text: str, max_tokens: int = 8000) -> List[str]:
28
- """Split text into chunks based on token count estimate"""
29
- words = text.split()
30
- chunks = []
31
- current_chunk = []
32
- current_tokens = 0
33
-
34
- for word in words:
35
- # Estimate tokens (roughly 1 token per 4 characters)
36
- word_tokens = len(word) // 4 + 1
37
- if current_tokens + word_tokens > max_tokens and current_chunk:
38
- chunks.append(' '.join(current_chunk))
39
- current_chunk = [word]
40
- current_tokens = word_tokens
41
- else:
42
- current_chunk.append(word)
43
- current_tokens += word_tokens
44
-
45
- if current_chunk:
46
- chunks.append(' '.join(current_chunk))
47
-
48
- return chunks
49
 
50
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
51
- """Extract text from spreadsheet files with error handling"""
52
  try:
53
  if not os.path.exists(file_path):
54
  return f"File not found: {file_path}"
@@ -68,13 +36,12 @@ def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, t
68
  line = " | ".join(str(cell) for cell in row if pd.notna(cell))
69
  if line:
70
  lines.append(line)
71
- return f"📄 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
72
 
73
  except Exception as e:
74
  return f"[Error reading {os.path.basename(file_path)}]: {str(e)}"
75
 
76
  def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
77
- """Extract text from PDF files with error handling"""
78
  try:
79
  if not os.path.exists(file_path):
80
  return f"PDF not found: {file_path}"
@@ -87,31 +54,42 @@ def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -
87
  text = page.extract_text() or ""
88
  extracted.append(text.strip())
89
  if progress:
90
- progress((index + (i / num_pages)) / total,
91
- desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
92
  except Exception as e:
93
  extracted.append(f"[Error reading page {i+1}]: {str(e)}")
94
- return f"📄 {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
95
 
96
  except Exception as e:
97
  return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  def create_ui(agent: TxAgent):
100
- with gr.Blocks(theme=gr.themes.Soft(), title="Clinical Patient Support System") as demo:
101
- gr.Markdown("<h1 style='text-align: center;'>📋 CPS: Clinical Patient Support System</h1>")
102
-
103
- # Fix: Changed type to 'messages' to match Gradio requirements
104
- chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="messages")
105
-
106
  file_upload = gr.File(
107
  label="Upload Medical File",
108
  file_types=[".pdf", ".txt", ".docx", ".jpg", ".png", ".csv", ".xls", ".xlsx"],
109
  file_count="multiple"
110
  )
111
- message_input = gr.Textbox(
112
- placeholder="Ask a biomedical question or just upload the files...",
113
- show_label=False
114
- )
115
  send_button = gr.Button("Send", variant="primary")
116
  conversation_state = gr.State([])
117
 
@@ -126,7 +104,6 @@ def create_ui(agent: TxAgent):
126
  )
127
 
128
  try:
129
- # Show processing message immediately
130
  history.append((message, "⏳ Processing your request..."))
131
  yield history
132
 
@@ -169,23 +146,18 @@ def create_ui(agent: TxAgent):
169
  max_round=30
170
  )
171
 
172
- # Collect all updates from the generator
173
  chunk_response = ""
174
  for update in generator:
175
  if isinstance(update, str):
176
  chunk_response += update
177
  elif isinstance(update, list):
178
- # Handle list of messages
179
  for msg in update:
180
  if hasattr(msg, 'content'):
181
  chunk_response += msg.content
182
 
183
  full_response += chunk_response + "\n\n"
184
 
185
- # Clean up the final response
186
  full_response = clean_final_response(full_response.strip())
187
-
188
- # Remove the processing message and add the final response
189
  history[-1] = (message, full_response)
190
  yield history
191
 
@@ -208,4 +180,4 @@ def create_ui(agent: TxAgent):
208
  ["Is there anything abnormal in the attached blood work report?"]
209
  ], inputs=message_input)
210
 
211
- return demo
 
3
  import pandas as pd
4
  import pdfplumber
5
  import gradio as gr
6
+ from typing import List
 
7
 
8
  # ✅ Fix: Add src to Python path
9
  sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
 
11
  from txagent.txagent import TxAgent
12
 
13
  def sanitize_utf8(text: str) -> str:
14
+ return text.encode("utf-8", "ignore").decode("utf-8")
15
+
16
+ def clean_final_response(text: str) -> str:
17
+ return text.replace("[TOOL_CALLS]", "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
 
20
  try:
21
  if not os.path.exists(file_path):
22
  return f"File not found: {file_path}"
 
36
  line = " | ".join(str(cell) for cell in row if pd.notna(cell))
37
  if line:
38
  lines.append(line)
39
+ return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
40
 
41
  except Exception as e:
42
  return f"[Error reading {os.path.basename(file_path)}]: {str(e)}"
43
 
44
  def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
 
45
  try:
46
  if not os.path.exists(file_path):
47
  return f"PDF not found: {file_path}"
 
54
  text = page.extract_text() or ""
55
  extracted.append(text.strip())
56
  if progress:
57
+ progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
 
58
  except Exception as e:
59
  extracted.append(f"[Error reading page {i+1}]: {str(e)}")
60
+ return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
61
 
62
  except Exception as e:
63
  return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
64
 
65
+ def chunk_text(text: str, max_tokens: int = 8192) -> List[str]:
66
+ chunks = []
67
+ words = text.split()
68
+ chunk = []
69
+ token_count = 0
70
+ for word in words:
71
+ token_count += len(word) // 4 + 1
72
+ if token_count > max_tokens:
73
+ chunks.append(" ".join(chunk))
74
+ chunk = [word]
75
+ token_count = len(word) // 4 + 1
76
+ else:
77
+ chunk.append(word)
78
+ if chunk:
79
+ chunks.append(" ".join(chunk))
80
+ return chunks
81
+
82
  def create_ui(agent: TxAgent):
83
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
84
+ gr.Markdown("<h1 style='text-align: center;'>\U0001F4CB CPS: Clinical Patient Support System</h1>")
85
+
86
+ chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="tuples")
 
 
87
  file_upload = gr.File(
88
  label="Upload Medical File",
89
  file_types=[".pdf", ".txt", ".docx", ".jpg", ".png", ".csv", ".xls", ".xlsx"],
90
  file_count="multiple"
91
  )
92
+ message_input = gr.Textbox(placeholder="Ask a biomedical question or just upload the files...", show_label=False)
 
 
 
93
  send_button = gr.Button("Send", variant="primary")
94
  conversation_state = gr.State([])
95
 
 
104
  )
105
 
106
  try:
 
107
  history.append((message, "⏳ Processing your request..."))
108
  yield history
109
 
 
146
  max_round=30
147
  )
148
 
 
149
  chunk_response = ""
150
  for update in generator:
151
  if isinstance(update, str):
152
  chunk_response += update
153
  elif isinstance(update, list):
 
154
  for msg in update:
155
  if hasattr(msg, 'content'):
156
  chunk_response += msg.content
157
 
158
  full_response += chunk_response + "\n\n"
159
 
 
160
  full_response = clean_final_response(full_response.strip())
 
 
161
  history[-1] = (message, full_response)
162
  yield history
163
 
 
180
  ["Is there anything abnormal in the attached blood work report?"]
181
  ], inputs=message_input)
182
 
183
+ return demo