Ali2206 commited on
Commit
a834285
·
verified ·
1 Parent(s): 8c72711

Update ui/ui_core.py

Browse files
Files changed (1) hide show
  1. ui/ui_core.py +20 -58
ui/ui_core.py CHANGED
@@ -2,6 +2,7 @@ import sys
2
  import os
3
  import pandas as pd
4
  import pdfplumber
 
5
  import gradio as gr
6
  from typing import List
7
 
@@ -20,7 +21,6 @@ def clean_final_response(text: str) -> str:
20
  if len(responses) <= 1:
21
  return f"<div style='padding:1em;border:1px solid #ccc;border-radius:12px;color:#fff;background:#353F54;'><p>{cleaned}</p></div>"
22
 
23
- # Support multiple [Final Analysis] sections
24
  panels = []
25
  for i, section in enumerate(responses[1:], 1):
26
  final = section.strip()
@@ -32,59 +32,30 @@ def clean_final_response(text: str) -> str:
32
  )
33
  return "".join(panels)
34
 
35
- def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, total=1) -> str:
36
  try:
37
- if not os.path.exists(file_path):
38
- return f"File not found: {file_path}"
39
-
40
- if progress:
41
- progress((index + 1) / total, desc=f"Reading spreadsheet: {os.path.basename(file_path)}")
42
-
43
- df = None
44
- if file_path.endswith(".csv"):
45
  df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
46
- elif file_path.endswith((".xls", ".xlsx")):
47
  try:
48
  df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
49
  except:
50
  df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
 
 
 
 
 
 
51
 
52
  if df is None or df.empty:
53
- return f"[Warning] No data extracted from: {file_path}"
54
-
55
- df = df.fillna("") # Handle missing data gracefully
56
-
57
- lines = []
58
- for _, row in df.iterrows():
59
- line = " | ".join(str(cell) for cell in row if str(cell).strip())
60
- if line:
61
- lines.append(line)
62
-
63
- return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
64
 
 
 
 
65
  except Exception as e:
66
- return f"[Error reading {os.path.basename(file_path)}]: {str(e)}"
67
-
68
- def extract_all_text_from_pdf(file_path: str, progress=None, index=0, total=1) -> str:
69
- try:
70
- if not os.path.exists(file_path):
71
- return f"PDF not found: {file_path}"
72
-
73
- extracted = []
74
- with pdfplumber.open(file_path) as pdf:
75
- num_pages = len(pdf.pages)
76
- for i, page in enumerate(pdf.pages):
77
- try:
78
- text = page.extract_text() or ""
79
- extracted.append(text.strip())
80
- if progress:
81
- progress((index + (i / num_pages)) / total, desc=f"Reading PDF: {os.path.basename(file_path)} ({i+1}/{num_pages})")
82
- except Exception as e:
83
- extracted.append(f"[Error reading page {i+1}]: {str(e)}")
84
- return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n\n".join(extracted)
85
-
86
- except Exception as e:
87
- return f"[Error reading PDF {os.path.basename(file_path)}]: {str(e)}"
88
 
89
  def chunk_text(text: str, max_tokens: int = 8192) -> List[str]:
90
  chunks = []
@@ -103,8 +74,6 @@ def chunk_text(text: str, max_tokens: int = 8192) -> List[str]:
103
  chunks.append(" ".join(chunk))
104
  return chunks
105
 
106
- # ... rest of the UI code remains unchanged
107
-
108
  def create_ui(agent: TxAgent):
109
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
110
  gr.Markdown("<h1 style='text-align: center;'>\U0001F4CB CPS: Clinical Patient Support System</h1>")
@@ -140,18 +109,11 @@ def create_ui(agent: TxAgent):
140
  if not hasattr(file, 'name'):
141
  continue
142
  path = file.name
143
- try:
144
- if path.endswith((".csv", ".xls", ".xlsx")):
145
- extracted_text += extract_all_text_from_csv_or_excel(path, progress, index, total_files) + "\n"
146
- elif path.endswith(".pdf"):
147
- extracted_text += extract_all_text_from_pdf(path, progress, index, total_files) + "\n"
148
- else:
149
- extracted_text += f"(Uploaded file: {os.path.basename(path)})\n"
150
- except Exception as file_error:
151
- extracted_text += f"[Error processing {os.path.basename(path)}]: {str(file_error)}\n"
152
-
153
- sanitized = sanitize_utf8(extracted_text.strip())
154
- chunks = chunk_text(sanitized)
155
 
156
  full_response = ""
157
  for i, chunk in enumerate(chunks):
 
2
  import os
3
  import pandas as pd
4
  import pdfplumber
5
+ import json
6
  import gradio as gr
7
  from typing import List
8
 
 
21
  if len(responses) <= 1:
22
  return f"<div style='padding:1em;border:1px solid #ccc;border-radius:12px;color:#fff;background:#353F54;'><p>{cleaned}</p></div>"
23
 
 
24
  panels = []
25
  for i, section in enumerate(responses[1:], 1):
26
  final = section.strip()
 
32
  )
33
  return "".join(panels)
34
 
35
+ def convert_file_to_json(file_path: str, file_type: str) -> str:
36
  try:
37
+ if file_type == "csv":
 
 
 
 
 
 
 
38
  df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
39
+ elif file_type in ["xls", "xlsx"]:
40
  try:
41
  df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
42
  except:
43
  df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
44
+ elif file_type == "pdf":
45
+ with pdfplumber.open(file_path) as pdf:
46
+ text = "\n".join([page.extract_text() or "" for page in pdf.pages])
47
+ return json.dumps({"filename": os.path.basename(file_path), "content": text.strip()})
48
+ else:
49
+ return json.dumps({"error": f"Unsupported file type: {file_type}"})
50
 
51
  if df is None or df.empty:
52
+ return json.dumps({"warning": f"No data extracted from: {file_path}"})
 
 
 
 
 
 
 
 
 
 
53
 
54
+ df = df.fillna("")
55
+ content = df.astype(str).values.tolist()
56
+ return json.dumps({"filename": os.path.basename(file_path), "rows": content})
57
  except Exception as e:
58
+ return json.dumps({"error": f"Error reading {os.path.basename(file_path)}: {str(e)}"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def chunk_text(text: str, max_tokens: int = 8192) -> List[str]:
61
  chunks = []
 
74
  chunks.append(" ".join(chunk))
75
  return chunks
76
 
 
 
77
  def create_ui(agent: TxAgent):
78
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
79
  gr.Markdown("<h1 style='text-align: center;'>\U0001F4CB CPS: Clinical Patient Support System</h1>")
 
109
  if not hasattr(file, 'name'):
110
  continue
111
  path = file.name
112
+ extension = path.split(".")[-1].lower()
113
+ json_text = convert_file_to_json(path, extension)
114
+ extracted_text += sanitize_utf8(json_text) + "\n"
115
+
116
+ chunks = chunk_text(extracted_text.strip())
 
 
 
 
 
 
 
117
 
118
  full_response = ""
119
  for i, chunk in enumerate(chunks):