Ali2206 commited on
Commit
2737da8
·
verified ·
1 Parent(s): e69f295

Update ui/ui_core.py

Browse files
Files changed (1) hide show
  1. ui/ui_core.py +7 -4
ui/ui_core.py CHANGED
@@ -42,21 +42,24 @@ def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, t
42
 
43
  df = None
44
  if file_path.endswith(".csv"):
45
- df = pd.read_csv(file_path, encoding_errors="replace", dtype=str, header=None)
46
  elif file_path.endswith((".xls", ".xlsx")):
47
  try:
48
- df = pd.read_excel(file_path, engine="openpyxl", dtype=str, header=None)
49
  except:
50
- df = pd.read_excel(file_path, engine="xlrd", dtype=str, header=None)
51
 
52
  if df is None or df.empty:
53
  return f"[Warning] No data extracted from: {file_path}"
54
 
 
 
55
  lines = []
56
  for _, row in df.iterrows():
57
- line = " | ".join(str(cell) for cell in row if pd.notna(cell))
58
  if line:
59
  lines.append(line)
 
60
  return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
61
 
62
  except Exception as e:
 
42
 
43
  df = None
44
  if file_path.endswith(".csv"):
45
+ df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
46
  elif file_path.endswith((".xls", ".xlsx")):
47
  try:
48
+ df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
49
  except:
50
+ df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
51
 
52
  if df is None or df.empty:
53
  return f"[Warning] No data extracted from: {file_path}"
54
 
55
+ df = df.fillna("") # Handle missing data gracefully
56
+
57
  lines = []
58
  for _, row in df.iterrows():
59
+ line = " | ".join(str(cell) for cell in row if str(cell).strip())
60
  if line:
61
  lines.append(line)
62
+
63
  return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
64
 
65
  except Exception as e: