Update ui/ui_core.py
Browse files- ui/ui_core.py +7 -4
ui/ui_core.py
CHANGED
@@ -42,21 +42,24 @@ def extract_all_text_from_csv_or_excel(file_path: str, progress=None, index=0, t
|
|
42 |
|
43 |
df = None
|
44 |
if file_path.endswith(".csv"):
|
45 |
-
df = pd.read_csv(file_path, encoding_errors="replace", dtype=str,
|
46 |
elif file_path.endswith((".xls", ".xlsx")):
|
47 |
try:
|
48 |
-
df = pd.read_excel(file_path, engine="openpyxl",
|
49 |
except:
|
50 |
-
df = pd.read_excel(file_path, engine="xlrd",
|
51 |
|
52 |
if df is None or df.empty:
|
53 |
return f"[Warning] No data extracted from: {file_path}"
|
54 |
|
|
|
|
|
55 |
lines = []
|
56 |
for _, row in df.iterrows():
|
57 |
-
line = " | ".join(str(cell) for cell in row if
|
58 |
if line:
|
59 |
lines.append(line)
|
|
|
60 |
return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
|
61 |
|
62 |
except Exception as e:
|
|
|
42 |
|
43 |
df = None
|
44 |
if file_path.endswith(".csv"):
|
45 |
+
df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
|
46 |
elif file_path.endswith((".xls", ".xlsx")):
|
47 |
try:
|
48 |
+
df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
|
49 |
except:
|
50 |
+
df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
|
51 |
|
52 |
if df is None or df.empty:
|
53 |
return f"[Warning] No data extracted from: {file_path}"
|
54 |
|
55 |
+
df = df.fillna("") # Handle missing data gracefully
|
56 |
+
|
57 |
lines = []
|
58 |
for _, row in df.iterrows():
|
59 |
+
line = " | ".join(str(cell) for cell in row if str(cell).strip())
|
60 |
if line:
|
61 |
lines.append(line)
|
62 |
+
|
63 |
return f"\U0001F4C4 {os.path.basename(file_path)}\n\n" + "\n".join(lines)
|
64 |
|
65 |
except Exception as e:
|