Ali2206 commited on
Commit
936692d
·
verified ·
1 Parent(s): b20bb52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -42,11 +42,21 @@ def clean_response(text: str) -> str:
42
  def extract_text_from_excel(path: str) -> str:
43
  all_text = []
44
  xls = pd.ExcelFile(path)
45
- for sheet in xls.sheet_names:
46
- df = xls.parse(sheet).astype(str).fillna("").drop_duplicates()
47
- df = df[~df.apply(lambda x: x.str.len().le(5)).any(axis=1)] # remove very short rows
48
- rows = df.apply(lambda row: " | ".join(row), axis=1)
49
- all_text += [f"[{sheet}] {line}" for line in rows if line.strip()]
 
 
 
 
 
 
 
 
 
 
50
  return "\n".join(all_text)
51
 
52
  def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]:
 
42
  def extract_text_from_excel(path: str) -> str:
43
  all_text = []
44
  xls = pd.ExcelFile(path)
45
+
46
+ for sheet_name in xls.sheet_names:
47
+ try:
48
+ df = xls.parse(sheet_name).astype(str).fillna("")
49
+ except Exception:
50
+ continue # Skip sheet if unreadable
51
+
52
+ for idx, row in df.iterrows():
53
+ # If the row has at least 2 non-empty values and is not totally empty
54
+ non_empty = [cell.strip() for cell in row if cell.strip() != ""]
55
+ if len(non_empty) >= 2:
56
+ text_line = " | ".join(non_empty)
57
+ if len(text_line) > 15: # Ignore very small lines
58
+ all_text.append(f"[{sheet_name}] {text_line}")
59
+
60
  return "\n".join(all_text)
61
 
62
  def split_text(text: str, max_tokens=MAX_CHUNK_TOKENS) -> List[str]: