CPS-Test-Mobile

Paused

Ali2206 commited on 26 days ago

Commit

936692d

verified ·

1 Parent(s): b20bb52

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -42,11 +42,21 @@ def clean_response(text: str) -> str:
 def extract_text_from_excel(path: str) -> str:
     all_text = []
     xls = pd.ExcelFile(path)
-    for sheet in xls.sheet_names:
-        df = xls.parse(sheet).astype(str).fillna("").drop_duplicates()
-        df = df[~df.apply(lambda x: x.str.len().le(5)).any(axis=1)]  # remove very short rows
-        rows = df.apply(lambda row: " | ".join(row), axis=1)
-        all_text += [f"[{sheet}] {line}" for line in rows if line.strip()]
     return "\n".join(all_text)
 def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:

 def extract_text_from_excel(path: str) -> str:
     all_text = []
     xls = pd.ExcelFile(path)
+    for sheet_name in xls.sheet_names:
+        try:
+            df = xls.parse(sheet_name).astype(str).fillna("")
+        except Exception:
+            continue  # Skip sheet if unreadable
+        for idx, row in df.iterrows():
+            # If the row has at least 2 non-empty values and is not totally empty
+            non_empty = [cell.strip() for cell in row if cell.strip() != ""]
+            if len(non_empty) >= 2:
+                text_line = " | ".join(non_empty)
+                if len(text_line) > 15:  # Ignore very small lines
+                    all_text.append(f"[{sheet_name}] {text_line}")
     return "\n".join(all_text)
 def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]: