|
import subprocess |
|
import os |
|
from typing import Optional, Dict, Any |
|
from llama_index.core.tools import FunctionTool |
|
from llama_index.core import SimpleDirectoryReader |
|
from llama_index.readers.file import ( |
|
PandasCSVReader, |
|
CSVReader, |
|
PandasExcelReader |
|
) |
|
import pandas as pd |
|
from llama_index.core import Document |
|
|
|
|
|
def execute_python_file(file_path: str) -> Dict[str, Any]: |
|
""" |
|
Execute a Python file and return its output. |
|
|
|
Args: |
|
file_path: Path to the Python file to execute |
|
|
|
Returns: |
|
Dictionary containing the output and execution status |
|
""" |
|
|
|
if not os.path.exists(file_path): |
|
return { |
|
"success": False, |
|
"error": f"File not found at {file_path}", |
|
"output": None |
|
} |
|
|
|
try: |
|
|
|
result = subprocess.run( |
|
["python3", file_path], |
|
capture_output=True, |
|
text=True, |
|
check=True |
|
) |
|
|
|
|
|
return { |
|
"success": True, |
|
"error": None, |
|
"output": result.stdout.strip() |
|
} |
|
except subprocess.CalledProcessError as e: |
|
return { |
|
"success": False, |
|
"error": f"Execution error: {e}", |
|
"stderr": e.stderr, |
|
"output": None |
|
} |
|
except Exception as e: |
|
return { |
|
"success": False, |
|
"error": f"Error: {str(e)}", |
|
"output": None |
|
} |
|
|
|
|
|
|
|
execute_python_file_tool = FunctionTool.from_defaults( |
|
name="execute_python_file", |
|
description="Execute a Python file and return its output.", |
|
fn=execute_python_file |
|
) |
|
|
|
|
|
def csv_excel_reader(file_path: str) -> list: |
|
""" |
|
Read and parse CSV or Excel files using LlamaIndex document readers. |
|
|
|
This function processes both CSV and Excel files with proper path handling. |
|
|
|
Args: |
|
file_path (str): Path to the CSV or Excel file to be read |
|
|
|
Returns: |
|
list: Document objects containing the parsed data from the file |
|
|
|
Raises: |
|
FileNotFoundError: If the specified file doesn't exist |
|
ValueError: If the file cannot be parsed or has an unsupported extension |
|
""" |
|
|
|
|
|
if not os.path.exists(file_path): |
|
raise FileNotFoundError(f"File not found at {file_path}") |
|
|
|
|
|
file_ext = os.path.splitext(file_path)[1].lower() |
|
|
|
|
|
try: |
|
if file_ext in ['.xlsx', '.xls']: |
|
|
|
excel = pd.ExcelFile(file_path) |
|
documents = [] |
|
|
|
|
|
for sheet_name in excel.sheet_names: |
|
df = pd.read_excel(file_path, sheet_name=sheet_name) |
|
|
|
|
|
content = df.to_string(index=False) |
|
|
|
|
|
doc = Document( |
|
text=content, |
|
metadata={ |
|
"source": file_path, |
|
"sheet_name": sheet_name, |
|
"filename": os.path.basename(file_path) |
|
} |
|
) |
|
documents.append(doc) |
|
|
|
return documents |
|
|
|
elif file_ext == '.csv': |
|
|
|
df = pd.read_csv(file_path) |
|
|
|
|
|
content = df.to_string(index=False) |
|
|
|
|
|
doc = Document( |
|
text=content, |
|
metadata={ |
|
"source": file_path, |
|
"filename": os.path.basename(file_path) |
|
} |
|
) |
|
return [doc] |
|
|
|
else: |
|
raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls") |
|
|
|
except Exception as e: |
|
import sys |
|
import traceback |
|
|
|
exc_type, exc_value, exc_traceback = sys.exc_info() |
|
error_details = traceback.format_exception(exc_type, exc_value, exc_traceback) |
|
|
|
raise ValueError(f"Error processing file {file_path}: {str(e)}\nDetails: {''.join(error_details)}") |
|
|
|
|
|
|
|
csv_excel_reader_tool = FunctionTool.from_defaults( |
|
name="csv_excel_reader", |
|
description="Reads CSV or Excel files and returns them as Document objects. Directly uses pandas to read the files rather than relying on SimpleDirectoryReader.", |
|
fn=csv_excel_reader |
|
) |