Ali2206 commited on
Commit
446fbec
·
verified ·
1 Parent(s): 05cb28a

Update ui/ui_core.py

Browse files
Files changed (1) hide show
  1. ui/ui_core.py +35 -75
ui/ui_core.py CHANGED
@@ -3,35 +3,21 @@ import os
3
  import pandas as pd
4
  import pdfplumber
5
  import gradio as gr
6
- from tabulate import tabulate
7
- from typing import List, Optional
8
 
9
- # ✅ Fix: Add src to Python path with correct parentheses
10
  sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
11
 
12
  from txagent.txagent import TxAgent
13
 
14
- def safe_extract_table_data(table: List[List[str]]) -> List[str]:
15
- extracted_rows = []
16
- if not table or not isinstance(table, list):
17
- return extracted_rows
18
- for row in table:
19
- if not row or not isinstance(row, list):
20
- continue
21
- try:
22
- clean_row = [str(cell) if cell is not None else "" for cell in row]
23
- if any(clean_row):
24
- extracted_rows.append("\t".join(clean_row))
25
- except Exception as e:
26
- print(f"Error processing table row: {e}")
27
- continue
28
- return extracted_rows
29
-
30
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
31
  try:
32
  if not os.path.exists(file_path):
33
  return f"File not found: {file_path}"
34
 
 
 
 
35
  if file_path.endswith(".csv"):
36
  df = pd.read_csv(file_path, encoding="utf-8", errors="replace", low_memory=False)
37
  elif file_path.endswith((".xls", ".xlsx")):
@@ -39,62 +25,40 @@ def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, t
39
  else:
40
  return f"Unsupported spreadsheet format: {file_path}"
41
 
42
- if progress:
43
- progress((index + 1) / total, desc=f"Processed table: {os.path.basename(file_path)}")
44
-
45
- group_column = None
46
- for col in ["Booking Number", "Form Name"]:
47
- if col in df.columns:
48
- group_column = col
49
- break
50
-
51
- if group_column:
52
- try:
53
- groups = df.groupby(group_column)
54
- result = []
55
- for group_name, group_df in groups:
56
- if group_name is None:
57
- continue
58
- result.append(f"\n### Group: {group_name}\n")
59
- result.append(tabulate(group_df, headers="keys", tablefmt="github", showindex=False))
60
- return "\n".join(result) if result else tabulate(df, headers="keys", tablefmt="github", showindex=False)
61
- except Exception as e:
62
- print(f"Error during grouping: {e}")
63
- return tabulate(df, headers="keys", tablefmt="github", showindex=False)
64
- else:
65
- return tabulate(df, headers="keys", tablefmt="github", showindex=False)
66
 
67
  except Exception as e:
68
- return f"Error parsing file {os.path.basename(file_path)}: {str(e)}"
69
 
70
  def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
71
- extracted = []
72
  try:
73
  if not os.path.exists(file_path):
74
- return f"PDF file not found: {file_path}"
75
 
 
76
  with pdfplumber.open(file_path) as pdf:
77
- num_pages = len(pdf.pages) if hasattr(pdf, 'pages') else 0
78
- for i, page in enumerate(pdf.pages if num_pages > 0 else []):
79
  try:
80
- tables = page.extract_tables() if hasattr(page, 'extract_tables') else []
81
- for table in tables if tables else []:
82
- extracted.extend(safe_extract_table_data(table))
83
-
84
- if progress and num_pages > 0:
85
- progress((index + (i / num_pages)) / total,
86
- desc=f"Parsing PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
87
- except Exception as page_error:
88
- print(f"Error processing page {i+1}: {page_error}")
89
- continue
90
-
91
- return "\n".join(extracted) if extracted else f"No extractable content found in {os.path.basename(file_path)}"
92
  except Exception as e:
93
- return f"Error parsing PDF {os.path.basename(file_path)}: {str(e)}"
94
 
95
  def create_ui(agent: TxAgent):
96
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
97
- gr.Markdown("<h1 style='text-align: center;'>📋 CPS: Clinical Patient Support System</h1>")
98
  chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="messages")
99
 
100
  file_upload = gr.File(
@@ -123,7 +87,7 @@ def create_ui(agent: TxAgent):
123
  for index, file in enumerate(uploaded_files):
124
  if not hasattr(file, 'name'):
125
  continue
126
-
127
  path = file.name
128
  try:
129
  if path.endswith((".csv", ".xls", ".xlsx")):
@@ -132,16 +96,14 @@ def create_ui(agent: TxAgent):
132
  extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
133
  else:
134
  extracted_text += f"(Uploaded file: {os.path.basename(path)})\n"
135
- if progress:
136
- progress((index + 1) / total_files, desc=f"Skipping unsupported file: {os.path.basename(path)}")
137
  except Exception as file_error:
138
- print(f"Error processing file {path}: {file_error}")
139
- extracted_text += f"\n[Error processing file: {os.path.basename(path)}]\n"
140
  continue
141
 
142
- message = f"{context}\n\n---\n{extracted_text.strip()}\n---\n\nBegin your reasoning."
 
 
143
 
144
- final_response = None
145
  generator = agent.run_gradio_chat(
146
  message=message,
147
  history=history,
@@ -153,7 +115,7 @@ def create_ui(agent: TxAgent):
153
  uploaded_files=uploaded_files,
154
  max_round=30
155
  )
156
-
157
  for update in generator:
158
  try:
159
  if isinstance(update, list):
@@ -163,15 +125,13 @@ def create_ui(agent: TxAgent):
163
  and not (
164
  msg.role == "assistant"
165
  and hasattr(msg, 'content')
166
- and msg.content.strip().startswith("🧰")
167
  )
168
  ]
169
  if cleaned:
170
- final_response = cleaned
171
  yield cleaned
172
- else:
173
- if isinstance(update, str) and not update.strip().startswith("🧰"):
174
- yield update.encode("utf-8", "replace").decode("utf-8")
175
  except Exception as update_error:
176
  print(f"Error processing update: {update_error}")
177
  continue
 
3
  import pandas as pd
4
  import pdfplumber
5
  import gradio as gr
6
+ from typing import List
 
7
 
8
+ # ✅ Fix: Add src to Python path
9
  sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
10
 
11
  from txagent.txagent import TxAgent
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
14
  try:
15
  if not os.path.exists(file_path):
16
  return f"File not found: {file_path}"
17
 
18
+ if progress:
19
+ progress((index + 1) / total, desc=f"Reading spreadsheet: {os.path.basename(file_path)}")
20
+
21
  if file_path.endswith(".csv"):
22
  df = pd.read_csv(file_path, encoding="utf-8", errors="replace", low_memory=False)
23
  elif file_path.endswith((".xls", ".xlsx")):
 
25
  else:
26
  return f"Unsupported spreadsheet format: {file_path}"
27
 
28
+ lines = []
29
+ for _, row in df.iterrows():
30
+ line = " | ".join(str(cell) for cell in row if pd.notna(cell))
31
+ if line:
32
+ lines.append(line)
33
+ return f"\ud83d\udcc4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  except Exception as e:
36
+ return f"[Error reading {os.path.basename(file_path)}]: {str(e)}"
37
 
38
  def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
 
39
  try:
40
  if not os.path.exists(file_path):
41
+ return f"PDF not found: {file_path}"
42
 
43
+ extracted = []
44
  with pdfplumber.open(file_path) as pdf:
45
+ num_pages = len(pdf.pages)
46
+ for i, page in enumerate(pdf.pages):
47
  try:
48
+ text = page.extract_text() or ""
49
+ extracted.append(text.strip())
50
+ if progress:
51
+ progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
52
+ except Exception as e:
53
+ extracted.append(f"[Error reading page {i+1}]: {str(e)}")
54
+ return f"\ud83d\udcc4 {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
55
+
 
 
 
 
56
  except Exception as e:
57
+ return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
58
 
59
  def create_ui(agent: TxAgent):
60
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
61
+ gr.Markdown("<h1 style='text-align: center;'>\ud83d\udccb CPS: Clinical Patient Support System</h1>")
62
  chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="messages")
63
 
64
  file_upload = gr.File(
 
87
  for index, file in enumerate(uploaded_files):
88
  if not hasattr(file, 'name'):
89
  continue
90
+
91
  path = file.name
92
  try:
93
  if path.endswith((".csv", ".xls", ".xlsx")):
 
96
  extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
97
  else:
98
  extracted_text += f"(Uploaded file: {os.path.basename(path)})\n"
 
 
99
  except Exception as file_error:
100
+ extracted_text += f"[Error processing file: {os.path.basename(path)}] {str(file_error)}\n"
 
101
  continue
102
 
103
+ message = (
104
+ f"{context}\n\n--- Uploaded File Content ---\n\n{extracted_text.strip()}\n\n--- End of File ---\n\nNow begin your reasoning:"
105
+ )
106
 
 
107
  generator = agent.run_gradio_chat(
108
  message=message,
109
  history=history,
 
115
  uploaded_files=uploaded_files,
116
  max_round=30
117
  )
118
+
119
  for update in generator:
120
  try:
121
  if isinstance(update, list):
 
125
  and not (
126
  msg.role == "assistant"
127
  and hasattr(msg, 'content')
128
+ and msg.content.strip().startswith("\ud83e\udde0")
129
  )
130
  ]
131
  if cleaned:
 
132
  yield cleaned
133
+ elif isinstance(update, str) and not update.strip().startswith("\ud83e\udde0"):
134
+ yield update.encode("utf-8", "replace").decode("utf-8")
 
135
  except Exception as update_error:
136
  print(f"Error processing update: {update_error}")
137
  continue