Ali2206 commited on
Commit
1f0c81e
·
verified ·
1 Parent(s): 0f864c0

Update ui/ui_core.py

Browse files
Files changed (1) hide show
  1. ui/ui_core.py +58 -30
ui/ui_core.py CHANGED
@@ -3,7 +3,8 @@ import os
3
  import pandas as pd
4
  import pdfplumber
5
  import gradio as gr
6
- from typing import List
 
7
 
8
  # ✅ Fix: Add src to Python path
9
  sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
@@ -11,12 +12,43 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..",
11
  from txagent.txagent import TxAgent
12
 
13
  def sanitize_utf8(text: str) -> str:
14
- return text.encode("utf-8", "ignore").decode("utf-8")
15
-
16
- def clean_final_response(text: str) -> str:
17
- return text.replace("[TOOL_CALLS]", "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
 
20
  try:
21
  if not os.path.exists(file_path):
22
  return f"File not found: {file_path}"
@@ -36,12 +68,13 @@ def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, t
36
  line = " | ".join(str(cell) for cell in row if pd.notna(cell))
37
  if line:
38
  lines.append(line)
39
- return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
40
 
41
  except Exception as e:
42
  return f"[Error reading {os.path.basename(file_path)}]: {str(e)}"
43
 
44
  def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
 
45
  try:
46
  if not os.path.exists(file_path):
47
  return f"PDF not found: {file_path}"
@@ -54,42 +87,31 @@ def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -
54
  text = page.extract_text() or ""
55
  extracted.append(text.strip())
56
  if progress:
57
- progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
 
58
  except Exception as e:
59
  extracted.append(f"[Error reading page {i+1}]: {str(e)}")
60
- return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
61
 
62
  except Exception as e:
63
  return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
64
 
65
- def chunk_text(text: str, max_tokens: int = 8192) -> List[str]:
66
- chunks = []
67
- words = text.split()
68
- chunk = []
69
- token_count = 0
70
- for word in words:
71
- token_count += len(word) // 4 + 1
72
- if token_count > max_tokens:
73
- chunks.append(" ".join(chunk))
74
- chunk = [word]
75
- token_count = len(word) // 4 + 1
76
- else:
77
- chunk.append(word)
78
- if chunk:
79
- chunks.append(" ".join(chunk))
80
- return chunks
81
-
82
  def create_ui(agent: TxAgent):
83
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
84
- gr.Markdown("<h1 style='text-align: center;'>\U0001F4CB CPS: Clinical Patient Support System</h1>")
85
-
86
- chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="text")
 
 
87
  file_upload = gr.File(
88
  label="Upload Medical File",
89
  file_types=[".pdf", ".txt", ".docx", ".jpg", ".png", ".csv", ".xls", ".xlsx"],
90
  file_count="multiple"
91
  )
92
- message_input = gr.Textbox(placeholder="Ask a biomedical question or just upload the files...", show_label=False)
 
 
 
93
  send_button = gr.Button("Send", variant="primary")
94
  conversation_state = gr.State([])
95
 
@@ -104,6 +126,7 @@ def create_ui(agent: TxAgent):
104
  )
105
 
106
  try:
 
107
  history.append((message, "⏳ Processing your request..."))
108
  yield history
109
 
@@ -146,18 +169,23 @@ def create_ui(agent: TxAgent):
146
  max_round=30
147
  )
148
 
 
149
  chunk_response = ""
150
  for update in generator:
151
  if isinstance(update, str):
152
  chunk_response += update
153
  elif isinstance(update, list):
 
154
  for msg in update:
155
  if hasattr(msg, 'content'):
156
  chunk_response += msg.content
157
 
158
  full_response += chunk_response + "\n\n"
159
 
 
160
  full_response = clean_final_response(full_response.strip())
 
 
161
  history[-1] = (message, full_response)
162
  yield history
163
 
 
3
  import pandas as pd
4
  import pdfplumber
5
  import gradio as gr
6
+ import re
7
+ from typing import List, Dict, Optional
8
 
9
  # ✅ Fix: Add src to Python path
10
  sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
 
12
  from txagent.txagent import TxAgent
13
 
14
  def sanitize_utf8(text: str) -> str:
15
+ """Clean text of problematic Unicode characters"""
16
+ return text.encode('utf-8', 'ignore').decode('utf-8')
17
+
18
+ def clean_final_response(response: str) -> str:
19
+ """Remove tool calls and other artifacts from final response"""
20
+ # Split on TOOL_CALLS if present
21
+ if '[TOOL_CALLS]' in response:
22
+ response = response.split('[TOOL_CALLS]')[0]
23
+ # Remove any remaining special tokens
24
+ response = re.sub(r'\[[A-Z_]+\]', '', response)
25
+ return response.strip()
26
+
27
+ def chunk_text(text: str, max_tokens: int = 8000) -> List[str]:
28
+ """Split text into chunks based on token count estimate"""
29
+ words = text.split()
30
+ chunks = []
31
+ current_chunk = []
32
+ current_tokens = 0
33
+
34
+ for word in words:
35
+ # Estimate tokens (roughly 1 token per 4 characters)
36
+ word_tokens = len(word) // 4 + 1
37
+ if current_tokens + word_tokens > max_tokens and current_chunk:
38
+ chunks.append(' '.join(current_chunk))
39
+ current_chunk = [word]
40
+ current_tokens = word_tokens
41
+ else:
42
+ current_chunk.append(word)
43
+ current_tokens += word_tokens
44
+
45
+ if current_chunk:
46
+ chunks.append(' '.join(current_chunk))
47
+
48
+ return chunks
49
 
50
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
51
+ """Extract text from spreadsheet files with error handling"""
52
  try:
53
  if not os.path.exists(file_path):
54
  return f"File not found: {file_path}"
 
68
  line = " | ".join(str(cell) for cell in row if pd.notna(cell))
69
  if line:
70
  lines.append(line)
71
+ return f"📄 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
72
 
73
  except Exception as e:
74
  return f"[Error reading {os.path.basename(file_path)}]: {str(e)}"
75
 
76
  def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
77
+ """Extract text from PDF files with error handling"""
78
  try:
79
  if not os.path.exists(file_path):
80
  return f"PDF not found: {file_path}"
 
87
  text = page.extract_text() or ""
88
  extracted.append(text.strip())
89
  if progress:
90
+ progress((index + (i / num_pages)) / total,
91
+ desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
92
  except Exception as e:
93
  extracted.append(f"[Error reading page {i+1}]: {str(e)}")
94
+ return f"📄 {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
95
 
96
  except Exception as e:
97
  return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  def create_ui(agent: TxAgent):
100
+ with gr.Blocks(theme=gr.themes.Soft(), title="Clinical Patient Support System") as demo:
101
+ gr.Markdown("<h1 style='text-align: center;'>📋 CPS: Clinical Patient Support System</h1>")
102
+
103
+ # Fix: Changed type to 'messages' to match Gradio requirements
104
+ chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="messages")
105
+
106
  file_upload = gr.File(
107
  label="Upload Medical File",
108
  file_types=[".pdf", ".txt", ".docx", ".jpg", ".png", ".csv", ".xls", ".xlsx"],
109
  file_count="multiple"
110
  )
111
+ message_input = gr.Textbox(
112
+ placeholder="Ask a biomedical question or just upload the files...",
113
+ show_label=False
114
+ )
115
  send_button = gr.Button("Send", variant="primary")
116
  conversation_state = gr.State([])
117
 
 
126
  )
127
 
128
  try:
129
+ # Show processing message immediately
130
  history.append((message, "⏳ Processing your request..."))
131
  yield history
132
 
 
169
  max_round=30
170
  )
171
 
172
+ # Collect all updates from the generator
173
  chunk_response = ""
174
  for update in generator:
175
  if isinstance(update, str):
176
  chunk_response += update
177
  elif isinstance(update, list):
178
+ # Handle list of messages
179
  for msg in update:
180
  if hasattr(msg, 'content'):
181
  chunk_response += msg.content
182
 
183
  full_response += chunk_response + "\n\n"
184
 
185
+ # Clean up the final response
186
  full_response = clean_final_response(full_response.strip())
187
+
188
+ # Remove the processing message and add the final response
189
  history[-1] = (message, full_response)
190
  yield history
191