Update app.py
Browse files
app.py
CHANGED
@@ -42,11 +42,21 @@ def clean_response(text: str) -> str:
|
|
42 |
def extract_text_from_excel(path: str) -> str:
|
43 |
all_text = []
|
44 |
xls = pd.ExcelFile(path)
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
return "\n".join(all_text)
|
51 |
|
52 |
def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
|
|
|
42 |
def extract_text_from_excel(path: str) -> str:
|
43 |
all_text = []
|
44 |
xls = pd.ExcelFile(path)
|
45 |
+
|
46 |
+
for sheet_name in xls.sheet_names:
|
47 |
+
try:
|
48 |
+
df = xls.parse(sheet_name).astype(str).fillna("")
|
49 |
+
except Exception:
|
50 |
+
continue # Skip sheet if unreadable
|
51 |
+
|
52 |
+
for idx, row in df.iterrows():
|
53 |
+
# If the row has at least 2 non-empty values and is not totally empty
|
54 |
+
non_empty = [cell.strip() for cell in row if cell.strip() != ""]
|
55 |
+
if len(non_empty) >= 2:
|
56 |
+
text_line = " | ".join(non_empty)
|
57 |
+
if len(text_line) > 15: # Ignore very small lines
|
58 |
+
all_text.append(f"[{sheet_name}] {text_line}")
|
59 |
+
|
60 |
return "\n".join(all_text)
|
61 |
|
62 |
def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
|