File size: 4,927 Bytes
41cae26 ab81a57 41cae26 ab81a57 41cae26 ab81a57 41cae26 ab81a57 41cae26 ab81a57 41cae26 ab81a57 41cae26 ab81a57 41cae26 ab81a57 41cae26 ab81a57 41cae26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import subprocess
import os
from typing import Optional, Dict, Any
from llama_index.core.tools import FunctionTool
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import (
PandasCSVReader,
CSVReader,
PandasExcelReader
)
import pandas as pd
from llama_index.core import Document
def execute_python_file(file_path: str) -> Dict[str, Any]:
"""
Execute a Python file and return its output.
Args:
file_path: Path to the Python file to execute
Returns:
Dictionary containing the output and execution status
"""
# Check if file exists
if not os.path.exists(file_path):
return {
"success": False,
"error": f"File not found at {file_path}",
"output": None
}
try:
# Execute the Python file and capture output
result = subprocess.run(
["python3", file_path], # Use python3 explicitly
capture_output=True,
text=True,
check=True
)
# Return the stdout output (trimmed of whitespace)
return {
"success": True,
"error": None,
"output": result.stdout.strip()
}
except subprocess.CalledProcessError as e:
return {
"success": False,
"error": f"Execution error: {e}",
"stderr": e.stderr,
"output": None
}
except Exception as e:
return {
"success": False,
"error": f"Error: {str(e)}",
"output": None
}
# Create a function tool for audio transcription
execute_python_file_tool = FunctionTool.from_defaults(
name="execute_python_file",
description="Execute a Python file and return its output.",
fn=execute_python_file
)
def csv_excel_reader(file_path: str) -> list:
"""
Read and parse CSV or Excel files using LlamaIndex document readers.
This function processes both CSV and Excel files with proper path handling.
Args:
file_path (str): Path to the CSV or Excel file to be read
Returns:
list: Document objects containing the parsed data from the file
Raises:
FileNotFoundError: If the specified file doesn't exist
ValueError: If the file cannot be parsed or has an unsupported extension
"""
# Check if file exists
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found at {file_path}")
# Get file extension
file_ext = os.path.splitext(file_path)[1].lower()
# Read file based on extension
try:
if file_ext in ['.xlsx', '.xls']:
# Read Excel file directly with pandas
excel = pd.ExcelFile(file_path)
documents = []
# Process each sheet
for sheet_name in excel.sheet_names:
df = pd.read_excel(file_path, sheet_name=sheet_name)
# Convert dataframe to string
content = df.to_string(index=False)
# Create a document with sheet metadata
doc = Document(
text=content,
metadata={
"source": file_path,
"sheet_name": sheet_name,
"filename": os.path.basename(file_path)
}
)
documents.append(doc)
return documents
elif file_ext == '.csv':
# Read CSV file directly with pandas
df = pd.read_csv(file_path)
# Convert dataframe to string
content = df.to_string(index=False)
# Create a document
doc = Document(
text=content,
metadata={
"source": file_path,
"filename": os.path.basename(file_path)
}
)
return [doc]
else:
raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls")
except Exception as e:
import sys
import traceback
exc_type, exc_value, exc_traceback = sys.exc_info()
error_details = traceback.format_exception(exc_type, exc_value, exc_traceback)
raise ValueError(f"Error processing file {file_path}: {str(e)}\nDetails: {''.join(error_details)}")
# Create a function tool for CSV/Excel reading
csv_excel_reader_tool = FunctionTool.from_defaults(
name="csv_excel_reader",
description="Reads CSV or Excel files and returns them as Document objects. Directly uses pandas to read the files rather than relying on SimpleDirectoryReader.",
fn=csv_excel_reader
) |