File size: 4,927 Bytes
41cae26
 
 
 
 
 
 
 
ab81a57
41cae26
ab81a57
 
41cae26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab81a57
41cae26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab81a57
41cae26
 
ab81a57
 
 
41cae26
ab81a57
 
 
41cae26
ab81a57
 
41cae26
ab81a57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41cae26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab81a57
41cae26
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import subprocess
import os
from typing import Optional, Dict, Any
from llama_index.core.tools import FunctionTool
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import (
    PandasCSVReader,
    CSVReader,
    PandasExcelReader
)
import pandas as pd
from llama_index.core import Document


def execute_python_file(file_path: str) -> Dict[str, Any]:
    """
    Execute a Python file and return its output.
    
    Args:
        file_path: Path to the Python file to execute
        
    Returns:
        Dictionary containing the output and execution status
    """
    # Check if file exists
    if not os.path.exists(file_path):
        return {
            "success": False,
            "error": f"File not found at {file_path}",
            "output": None
        }
    
    try:
        # Execute the Python file and capture output
        result = subprocess.run(
            ["python3", file_path],  # Use python3 explicitly
            capture_output=True,
            text=True,
            check=True
        )
        
        # Return the stdout output (trimmed of whitespace)
        return {
            "success": True,
            "error": None,
            "output": result.stdout.strip()
        }
    except subprocess.CalledProcessError as e:
        return {
            "success": False,
            "error": f"Execution error: {e}",
            "stderr": e.stderr,
            "output": None
        }
    except Exception as e:
        return {
            "success": False,
            "error": f"Error: {str(e)}",
            "output": None
        }


# Create a function tool for audio transcription
execute_python_file_tool = FunctionTool.from_defaults(
    name="execute_python_file",
    description="Execute a Python file and return its output.",
    fn=execute_python_file
)


def csv_excel_reader(file_path: str) -> list:
    """
    Read and parse CSV or Excel files using LlamaIndex document readers.
    
    This function processes both CSV and Excel files with proper path handling.
    
    Args:
        file_path (str): Path to the CSV or Excel file to be read
            
    Returns:
        list: Document objects containing the parsed data from the file
            
    Raises:
        FileNotFoundError: If the specified file doesn't exist
        ValueError: If the file cannot be parsed or has an unsupported extension
    """
    
    # Check if file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found at {file_path}")
    
    # Get file extension
    file_ext = os.path.splitext(file_path)[1].lower()
    
    # Read file based on extension
    try:
        if file_ext in ['.xlsx', '.xls']:
            # Read Excel file directly with pandas
            excel = pd.ExcelFile(file_path)
            documents = []
            
            # Process each sheet
            for sheet_name in excel.sheet_names:
                df = pd.read_excel(file_path, sheet_name=sheet_name)
                
                # Convert dataframe to string
                content = df.to_string(index=False)
                
                # Create a document with sheet metadata
                doc = Document(
                    text=content,
                    metadata={
                        "source": file_path,
                        "sheet_name": sheet_name,
                        "filename": os.path.basename(file_path)
                    }
                )
                documents.append(doc)
            
            return documents
            
        elif file_ext == '.csv':
            # Read CSV file directly with pandas
            df = pd.read_csv(file_path)
            
            # Convert dataframe to string
            content = df.to_string(index=False)
            
            # Create a document
            doc = Document(
                text=content,
                metadata={
                    "source": file_path,
                    "filename": os.path.basename(file_path)
                }
            )
            return [doc]
            
        else:
            raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls")
            
    except Exception as e:
        import sys
        import traceback
        
        exc_type, exc_value, exc_traceback = sys.exc_info()
        error_details = traceback.format_exception(exc_type, exc_value, exc_traceback)
        
        raise ValueError(f"Error processing file {file_path}: {str(e)}\nDetails: {''.join(error_details)}")


# Create a function tool for CSV/Excel reading
csv_excel_reader_tool = FunctionTool.from_defaults(
    name="csv_excel_reader",
    description="Reads CSV or Excel files and returns them as Document objects. Directly uses pandas to read the files rather than relying on SimpleDirectoryReader.",
    fn=csv_excel_reader
)