Spaces:
Running
Running
# tools/file_parser.py | |
import pandas as pd | |
import os | |
def parse_file_and_summarize(file_path: str, query: str = "") -> str: | |
""" | |
Reads a CSV or Excel file and optionally answers a simple question about it. | |
Args: | |
file_path (str): Path to the file (.csv or .xlsx). | |
query (str): Optional freeform instruction (e.g. "total food sales"). | |
Returns: | |
str: Summary or result from the file. | |
""" | |
try: | |
_, ext = os.path.splitext(file_path.lower()) | |
if ext == ".csv": | |
df = pd.read_csv(file_path) | |
elif ext in [".xls", ".xlsx"]: | |
df = pd.read_excel(file_path) | |
else: | |
return "Unsupported file format. Please upload CSV or Excel." | |
if df.empty: | |
return "The file is empty or unreadable." | |
if not query: | |
return f"Loaded file with {df.shape[0]} rows and {df.shape[1]} columns.\nColumns: {', '.join(df.columns)}" | |
# Very basic natural language query handling (expand with LLM if needed) | |
if "total" in query.lower() and "food" in query.lower(): | |
food_rows = df[df['category'].str.lower() == "food"] | |
if "sales" in df.columns: | |
total = food_rows["sales"].sum() | |
return f"Total food sales: ${total:.2f}" | |
else: | |
return "Could not find 'sales' column in the file." | |
else: | |
return "Query not supported. Please specify a clearer question." | |
except Exception as e: | |
return f"File parsing error: {str(e)}" | |