drAbreu's picture
Fixed bugs for excel handling and add openpyxl dependency
ab81a57
import subprocess
import os
from typing import Optional, Dict, Any
from llama_index.core.tools import FunctionTool
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import (
PandasCSVReader,
CSVReader,
PandasExcelReader
)
import pandas as pd
from llama_index.core import Document
def execute_python_file(file_path: str) -> Dict[str, Any]:
"""
Execute a Python file and return its output.
Args:
file_path: Path to the Python file to execute
Returns:
Dictionary containing the output and execution status
"""
# Check if file exists
if not os.path.exists(file_path):
return {
"success": False,
"error": f"File not found at {file_path}",
"output": None
}
try:
# Execute the Python file and capture output
result = subprocess.run(
["python3", file_path], # Use python3 explicitly
capture_output=True,
text=True,
check=True
)
# Return the stdout output (trimmed of whitespace)
return {
"success": True,
"error": None,
"output": result.stdout.strip()
}
except subprocess.CalledProcessError as e:
return {
"success": False,
"error": f"Execution error: {e}",
"stderr": e.stderr,
"output": None
}
except Exception as e:
return {
"success": False,
"error": f"Error: {str(e)}",
"output": None
}
# Create a function tool for audio transcription
execute_python_file_tool = FunctionTool.from_defaults(
name="execute_python_file",
description="Execute a Python file and return its output.",
fn=execute_python_file
)
def csv_excel_reader(file_path: str) -> list:
"""
Read and parse CSV or Excel files using LlamaIndex document readers.
This function processes both CSV and Excel files with proper path handling.
Args:
file_path (str): Path to the CSV or Excel file to be read
Returns:
list: Document objects containing the parsed data from the file
Raises:
FileNotFoundError: If the specified file doesn't exist
ValueError: If the file cannot be parsed or has an unsupported extension
"""
# Check if file exists
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found at {file_path}")
# Get file extension
file_ext = os.path.splitext(file_path)[1].lower()
# Read file based on extension
try:
if file_ext in ['.xlsx', '.xls']:
# Read Excel file directly with pandas
excel = pd.ExcelFile(file_path)
documents = []
# Process each sheet
for sheet_name in excel.sheet_names:
df = pd.read_excel(file_path, sheet_name=sheet_name)
# Convert dataframe to string
content = df.to_string(index=False)
# Create a document with sheet metadata
doc = Document(
text=content,
metadata={
"source": file_path,
"sheet_name": sheet_name,
"filename": os.path.basename(file_path)
}
)
documents.append(doc)
return documents
elif file_ext == '.csv':
# Read CSV file directly with pandas
df = pd.read_csv(file_path)
# Convert dataframe to string
content = df.to_string(index=False)
# Create a document
doc = Document(
text=content,
metadata={
"source": file_path,
"filename": os.path.basename(file_path)
}
)
return [doc]
else:
raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls")
except Exception as e:
import sys
import traceback
exc_type, exc_value, exc_traceback = sys.exc_info()
error_details = traceback.format_exception(exc_type, exc_value, exc_traceback)
raise ValueError(f"Error processing file {file_path}: {str(e)}\nDetails: {''.join(error_details)}")
# Create a function tool for CSV/Excel reading
csv_excel_reader_tool = FunctionTool.from_defaults(
name="csv_excel_reader",
description="Reads CSV or Excel files and returns them as Document objects. Directly uses pandas to read the files rather than relying on SimpleDirectoryReader.",
fn=csv_excel_reader
)