import subprocess import os from typing import Optional, Dict, Any from llama_index.core.tools import FunctionTool from llama_index.core import SimpleDirectoryReader from llama_index.readers.file import ( PandasCSVReader, CSVReader, PandasExcelReader ) import pandas as pd from llama_index.core import Document def execute_python_file(file_path: str) -> Dict[str, Any]: """ Execute a Python file and return its output. Args: file_path: Path to the Python file to execute Returns: Dictionary containing the output and execution status """ # Check if file exists if not os.path.exists(file_path): return { "success": False, "error": f"File not found at {file_path}", "output": None } try: # Execute the Python file and capture output result = subprocess.run( ["python3", file_path], # Use python3 explicitly capture_output=True, text=True, check=True ) # Return the stdout output (trimmed of whitespace) return { "success": True, "error": None, "output": result.stdout.strip() } except subprocess.CalledProcessError as e: return { "success": False, "error": f"Execution error: {e}", "stderr": e.stderr, "output": None } except Exception as e: return { "success": False, "error": f"Error: {str(e)}", "output": None } # Create a function tool for audio transcription execute_python_file_tool = FunctionTool.from_defaults( name="execute_python_file", description="Execute a Python file and return its output.", fn=execute_python_file ) def csv_excel_reader(file_path: str) -> list: """ Read and parse CSV or Excel files using LlamaIndex document readers. This function processes both CSV and Excel files with proper path handling. Args: file_path (str): Path to the CSV or Excel file to be read Returns: list: Document objects containing the parsed data from the file Raises: FileNotFoundError: If the specified file doesn't exist ValueError: If the file cannot be parsed or has an unsupported extension """ # Check if file exists if not os.path.exists(file_path): raise FileNotFoundError(f"File not found at {file_path}") # Get file extension file_ext = os.path.splitext(file_path)[1].lower() # Read file based on extension try: if file_ext in ['.xlsx', '.xls']: # Read Excel file directly with pandas excel = pd.ExcelFile(file_path) documents = [] # Process each sheet for sheet_name in excel.sheet_names: df = pd.read_excel(file_path, sheet_name=sheet_name) # Convert dataframe to string content = df.to_string(index=False) # Create a document with sheet metadata doc = Document( text=content, metadata={ "source": file_path, "sheet_name": sheet_name, "filename": os.path.basename(file_path) } ) documents.append(doc) return documents elif file_ext == '.csv': # Read CSV file directly with pandas df = pd.read_csv(file_path) # Convert dataframe to string content = df.to_string(index=False) # Create a document doc = Document( text=content, metadata={ "source": file_path, "filename": os.path.basename(file_path) } ) return [doc] else: raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls") except Exception as e: import sys import traceback exc_type, exc_value, exc_traceback = sys.exc_info() error_details = traceback.format_exception(exc_type, exc_value, exc_traceback) raise ValueError(f"Error processing file {file_path}: {str(e)}\nDetails: {''.join(error_details)}") # Create a function tool for CSV/Excel reading csv_excel_reader_tool = FunctionTool.from_defaults( name="csv_excel_reader", description="Reads CSV or Excel files and returns them as Document objects. Directly uses pandas to read the files rather than relying on SimpleDirectoryReader.", fn=csv_excel_reader )