agents_course_final_assignement

Paused

File size: 4,927 Bytes

import subprocess
import os
from typing import Optional, Dict, Any
from llama_index.core.tools import FunctionTool
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import (
    PandasCSVReader,
    CSVReader,
    PandasExcelReader
)
import pandas as pd
from llama_index.core import Document


def execute_python_file(file_path: str) -> Dict[str, Any]:
    """
    Execute a Python file and return its output.
    
    Args:
        file_path: Path to the Python file to execute
        
    Returns:
        Dictionary containing the output and execution status
    """
    # Check if file exists
    if not os.path.exists(file_path):
        return {
            "success": False,
            "error": f"File not found at {file_path}",
            "output": None
        }
    
    try:
        # Execute the Python file and capture output
        result = subprocess.run(
            ["python3", file_path],  # Use python3 explicitly
            capture_output=True,
            text=True,
            check=True
        )
        
        # Return the stdout output (trimmed of whitespace)
        return {
            "success": True,
            "error": None,
            "output": result.stdout.strip()
        }
    except subprocess.CalledProcessError as e:
        return {
            "success": False,
            "error": f"Execution error: {e}",
            "stderr": e.stderr,
            "output": None
        }
    except Exception as e:
        return {
            "success": False,
            "error": f"Error: {str(e)}",
            "output": None
        }


# Create a function tool for audio transcription
execute_python_file_tool = FunctionTool.from_defaults(
    name="execute_python_file",
    description="Execute a Python file and return its output.",
    fn=execute_python_file
)


def csv_excel_reader(file_path: str) -> list:
    """
    Read and parse CSV or Excel files using LlamaIndex document readers.
    
    This function processes both CSV and Excel files with proper path handling.
    
    Args:
        file_path (str): Path to the CSV or Excel file to be read
            
    Returns:
        list: Document objects containing the parsed data from the file
            
    Raises:
        FileNotFoundError: If the specified file doesn't exist
        ValueError: If the file cannot be parsed or has an unsupported extension
    """
    
    # Check if file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found at {file_path}")
    
    # Get file extension
    file_ext = os.path.splitext(file_path)[1].lower()
    
    # Read file based on extension
    try:
        if file_ext in ['.xlsx', '.xls']:
            # Read Excel file directly with pandas
            excel = pd.ExcelFile(file_path)
            documents = []
            
            # Process each sheet
            for sheet_name in excel.sheet_names:
                df = pd.read_excel(file_path, sheet_name=sheet_name)
                
                # Convert dataframe to string
                content = df.to_string(index=False)
                
                # Create a document with sheet metadata
                doc = Document(
                    text=content,
                    metadata={
                        "source": file_path,
                        "sheet_name": sheet_name,
                        "filename": os.path.basename(file_path)
                    }
                )
                documents.append(doc)
            
            return documents
            
        elif file_ext == '.csv':
            # Read CSV file directly with pandas
            df = pd.read_csv(file_path)
            
            # Convert dataframe to string
            content = df.to_string(index=False)
            
            # Create a document
            doc = Document(
                text=content,
                metadata={
                    "source": file_path,
                    "filename": os.path.basename(file_path)
                }
            )
            return [doc]
            
        else:
            raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls")
            
    except Exception as e:
        import sys
        import traceback
        
        exc_type, exc_value, exc_traceback = sys.exc_info()
        error_details = traceback.format_exception(exc_type, exc_value, exc_traceback)
        
        raise ValueError(f"Error processing file {file_path}: {str(e)}\nDetails: {''.join(error_details)}")


# Create a function tool for CSV/Excel reading
csv_excel_reader_tool = FunctionTool.from_defaults(
    name="csv_excel_reader",
    description="Reads CSV or Excel files and returns them as Document objects. Directly uses pandas to read the files rather than relying on SimpleDirectoryReader.",
    fn=csv_excel_reader
)