agents_course_final_assignement

Paused

App Files Files Community

drAbreu commited on 7 days ago

Commit

41cae26

1 Parent(s): f08e4fa

Added code exectution, excel, audio capacity to the agent

Browse files

Files changed (7) hide show

.DS_Store +0 -0
.gitignore +5 -1
agents/llama_index_agent.py +27 -54
app.py +5 -4
requirements.txt +3 -1
tools/coding_tools.py +161 -0
tools/multimedia_tools.py +25 -1

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitignore CHANGED Viewed

@@ -9,4 +9,8 @@ agents/__pycache__
 *.pyc
-.mypy_cache

 *.pyc
+.mypy_cache
+testing_implementation.ipynb
+.DS_Store

agents/llama_index_agent.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from llama_index.core.agent.workflow import (
     ReActAgent,
-    FunctionAgent
 )
 from llama_index.core.llms import LLM
 import os
@@ -16,6 +17,12 @@ from tools.web_tools import (
     tavily_tool,
     wikipedia_tool
 )
 class GaiaAgent(ReActAgent):
     """
     A flexible ReActAgent for GAIA benchmark tasks that supports multiple LLM providers.
@@ -64,6 +71,8 @@ class GaiaAgent(ReActAgent):
                 wikipedia_tool.search_data,
                 tavily_tool.search,
                 transcribe_audio_tool,
                 ]
         # Use default system prompt if not provided
@@ -110,56 +119,6 @@ class GaiaAgent(ReActAgent):
             raise ValueError(f"Unsupported model provider: {model_provider}. "
                             f"Supported providers are: openai, anthropic")
-    def _get_default_system_prompt_legacy(self) -> str:
-        """Return the default system prompt for GAIA benchmark tasks."""
-        return """
-        You are the lead coordinator for a team of specialized AI agents tackling the GAIA benchmark. Your job is to analyze each question with extreme precision, determine the exact format required for the answer, break the task into logical steps, and either solve it yourself or delegate to the appropriate specialized agents.
-        ## QUESTION ANALYSIS PROCESS
-        1. First, carefully read and parse the entire question
-        2. Identify the EXACT output format required (single word, name, number, comma-separated list, etc.)
-        3. Note any special formatting requirements (alphabetical order, specific notation, etc.)
-        4. Identify what type of task this is (research, audio analysis, video analysis, code execution, data analysis, etc.)
-        5. Break the question into sequential steps
-        ## DELEGATION GUIDELINES
-        - video_analyst: Use for all YouTube video analysis, visual content identification, or scene description
-        - audio_analyst: Use for transcribing audio files, identifying speakers, or extracting information from recordings
-        - researcher: Use for factual queries, literature searches, finding specific information in papers or websites
-        - code_analyst: Use for executing, debugging or analyzing code snippets
-        - excel_analyst: Use for analyzing spreadsheets, calculating values, or extracting data from Excel files
-        ## CRITICAL RESPONSE RULES
-        - NEVER include explanations in your final answer
-        - NEVER include phrases like "the answer is" or "the result is"
-        - Return EXACTLY what was asked for - no more, no less
-        - If asked for a name, return ONLY the name
-        - If asked for a number, return ONLY the number
-        - If asked for a list, format it EXACTLY as specified (comma-separated, alphabetical, etc.)
-        - Double-check your answer against the exact output requirements before submitting
-        ## EXAMPLES OF PROPER RESPONSES:
-        Question: "What is the first name of the scientist who discovered penicillin?"
-        Correct answer: Alexander
-        Question: "List the prime numbers between 10 and 20 in ascending order."
-        Correct answer: 11, 13, 17, 19
-        Question: "If you understand this sentence, write the opposite of the word 'right' as the answer."
-        Correct answer: left
-        Question: "How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?"
-        Correct answer: 572
-        For questions with reverse text:
-        1. Use your reverse_text_tool to process the text
-        2. Understand the instruction in the reversed text
-        3. Follow the instruction exactly
-        After you have the final answer, verify one last time that it meets ALL formatting requirements from the question before submitting.
-        IMPORTANT: Your value is in providing PRECISELY what was asked for - not in showing your work or explaining how you got there.
-        """
     def _get_default_system_prompt(self) -> str:
         """Return the default system prompt for GAIA benchmark tasks."""
@@ -180,18 +139,32 @@ class GaiaAgent(ReActAgent):
         - search tools (wikipedia_tool, tavily_tool): For finding information
         - transcribe_audio: For transcribing audio files (provide the path to the audio file)
         - get_audio_metadata: For getting metadata about audio files
         3. Document your full analysis, including all key facts, calculations, and relevant information
         4. Clearly identify what you believe the correct answer is
         5. Be extremely explicit about the required formatting for the final answer
         ## HANDLING AUDIO TASKS
         When dealing with audio files:
         1. Check if an audio file path is available in the context's "audio_file_path" field
         2. Always use the transcribe_audio tool with the exact file path provided in the context
         3. Extract the specific information requested from the transcript (e.g., ingredients, page numbers, names)
-        4. Follow any special formatting instructions (e.g., comma-separated list, alphabetical order)
-        5. Make sure to provide exactly what is asked for (e.g., "only list ingredients, not measurements")
-        6. For audio tasks, ensure you've captured all relevant spoken content, including names, facts, or quotes as needed
         ## DELEGATION TO WRITER AGENT
         After completing your analysis, ALWAYS delegate the final answer preparation to the writer_agent with:

 from llama_index.core.agent.workflow import (
     ReActAgent,
+    FunctionAgent,
+    CodeActAgent
 )
 from llama_index.core.llms import LLM
 import os
     tavily_tool,
     wikipedia_tool
 )
+from tools.coding_tools import (
+    execute_python_file_tool,
+    csv_excel_reader_tool
+    )
 class GaiaAgent(ReActAgent):
     """
     A flexible ReActAgent for GAIA benchmark tasks that supports multiple LLM providers.
                 wikipedia_tool.search_data,
                 tavily_tool.search,
                 transcribe_audio_tool,
+                execute_python_file_tool,
+                csv_excel_reader_tool
                 ]
         # Use default system prompt if not provided
             raise ValueError(f"Unsupported model provider: {model_provider}. "
                             f"Supported providers are: openai, anthropic")
     def _get_default_system_prompt(self) -> str:
         """Return the default system prompt for GAIA benchmark tasks."""
         - search tools (wikipedia_tool, tavily_tool): For finding information
         - transcribe_audio: For transcribing audio files (provide the path to the audio file)
         - get_audio_metadata: For getting metadata about audio files
+        - execute_python_file: For executing Python code files and returning their output
         3. Document your full analysis, including all key facts, calculations, and relevant information
         4. Clearly identify what you believe the correct answer is
         5. Be extremely explicit about the required formatting for the final answer
+        ## HANDLING CODE EXECUTION TASKS
+        When dealing with Python code files:
+        1. Check if a Python file path is available in the context's "file_name" field
+        2. Always use the execute_python_file tool with the exact file path provided
+        3. Extract the specific numeric output requested from the execution result
+        4. For code tasks, ensure you've captured the final numeric output exactly as printed by the code
         ## HANDLING AUDIO TASKS
         When dealing with audio files:
         1. Check if an audio file path is available in the context's "audio_file_path" field
         2. Always use the transcribe_audio tool with the exact file path provided in the context
         3. Extract the specific information requested from the transcript (e.g., ingredients, page numbers, names)
+        4. For audio tasks, ensure you've captured all relevant spoken content, including names, facts, or quotes as needed
+        ## HANDLING CSV OR EXCEL DATA TASKS
+        When dealing with CSV files or data analysis tasks:
+        1. Check if a CSV file path is mentioned in the question or available in the context
+        2. Use the csv_reader tool with the specific CSV file path
+        3. Once the data is loaded, analyze it according to the question requirements
+        4. For data analysis tasks, ensure you've properly processed the CSV data and extracted the requested information
+        5. When calculations or statistics are needed, perform them accurately and document your methodology
         ## DELEGATION TO WRITER AGENT
         After completing your analysis, ALWAYS delegate the final answer preparation to the writer_agent with:

app.py CHANGED Viewed

@@ -103,7 +103,7 @@ class BasicAgent:
         local_file_path = None
         if file_name and task_id:
             try:
-                local_file_path = self.download_task_file(task_id)
                 print(f"Downloaded audio file to {local_file_path}")
             except Exception as e:
                 print(f"Error downloading audio file: {e}")
@@ -144,10 +144,10 @@ class BasicAgent:
         print(f"Agent returning answer: {final_answer}")
         return final_answer
-    def download_task_file(self, task_id: str) -> str:
         """Download a task file from the API and return the local file path."""
         api_url = DEFAULT_API_URL
-        file_url = f"{api_url}/files/{task_id}"
         print(f"Downloading file from: {file_url}")
@@ -160,7 +160,7 @@ class BasicAgent:
             downloads_dir.mkdir(exist_ok=True)
             # Save the file to the downloads directory
-            file_path = downloads_dir / f"{task_id}.mp3"
             with open(file_path, "wb") as f:
                 for chunk in response.iter_content(chunk_size=8192):
                     f.write(chunk)
@@ -170,6 +170,7 @@ class BasicAgent:
             print(f"Error downloading file: {e}")
             raise
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,

         local_file_path = None
         if file_name and task_id:
             try:
+                local_file_path = self.download_task_file(question_data)
                 print(f"Downloaded audio file to {local_file_path}")
             except Exception as e:
                 print(f"Error downloading audio file: {e}")
         print(f"Agent returning answer: {final_answer}")
         return final_answer
+    def download_task_file(self, question_data: dict) -> str:
         """Download a task file from the API and return the local file path."""
         api_url = DEFAULT_API_URL
+        file_url = f"{api_url}/files/{question_data['task_id']}"
         print(f"Downloading file from: {file_url}")
             downloads_dir.mkdir(exist_ok=True)
             # Save the file to the downloads directory
+            file_path = downloads_dir / f"{question_data['file_name']}"
             with open(file_path, "wb") as f:
                 for chunk in response.iter_content(chunk_size=8192):
                     f.write(chunk)
             print(f"Error downloading file: {e}")
             raise
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,

requirements.txt CHANGED Viewed

@@ -5,4 +5,6 @@ llama-index-tools-wikipedia
 llama-index-tools-tavily-research
 llama-index-llms-anthropic
 llama-index-llms-openai
-llama-index-readers-whisper

 llama-index-tools-tavily-research
 llama-index-llms-anthropic
 llama-index-llms-openai
+llama-index-readers-whisper
+llama-index-readers-file
+llama-index-readers-pandas-ai

tools/coding_tools.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import subprocess
+import os
+from typing import Optional, Dict, Any
+from llama_index.core.tools import FunctionTool
+from llama_index.core import SimpleDirectoryReader
+from llama_index.readers.file import (
+    PandasCSVReader,
+    CSVReader,
+)
+def execute_python_file(file_path: str) -> Dict[str, Any]:
+    """
+    Execute a Python file and return its output.
+    Args:
+        file_path: Path to the Python file to execute
+    Returns:
+        Dictionary containing the output and execution status
+    """
+    # Check if file exists
+    if not os.path.exists(file_path):
+        return {
+            "success": False,
+            "error": f"File not found at {file_path}",
+            "output": None
+        }
+    try:
+        # Execute the Python file and capture output
+        result = subprocess.run(
+            ["python3", file_path],  # Use python3 explicitly
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        # Return the stdout output (trimmed of whitespace)
+        return {
+            "success": True,
+            "error": None,
+            "output": result.stdout.strip()
+        }
+    except subprocess.CalledProcessError as e:
+        return {
+            "success": False,
+            "error": f"Execution error: {e}",
+            "stderr": e.stderr,
+            "output": None
+        }
+    except Exception as e:
+        return {
+            "success": False,
+            "error": f"Error: {str(e)}",
+            "output": None
+        }
+# Create a function tool for audio transcription
+execute_python_file_tool = FunctionTool.from_defaults(
+    name="execute_python_file",
+    description="Execute a Python file and return its output.",
+    fn=execute_python_file
+)
+def csv_excel_reader(file_path: str) -> list:
+    """
+    Read and parse CSV or Excel files using LlamaIndex document readers.
+    This function determines the file type by extension and uses the appropriate loader:
+    - For Excel files (.xlsx, .xls): Uses ExcelLoader
+    - For CSV files (.csv): Uses PandasCSVReader with fallback to CSVReader
+    Args:
+        file_path (str): Path to the CSV or Excel file to be read
+    Returns:
+        list: Document objects containing the parsed data from the file
+    Raises:
+        FileNotFoundError: If the specified file doesn't exist
+        ValueError: If the file cannot be parsed or has an unsupported extension
+    Examples:
+        >>> documents = csv_excel_reader("data/financial_report.csv")
+        >>> print(f"Loaded {len(documents)} documents")
+        >>>
+        >>> # Or with Excel files
+        >>> documents = csv_excel_reader("data/quarterly_reports.xlsx")
+        >>> print(f"Loaded {len(documents)} documents from Excel file")
+    """
+    import os
+    # Check if file exists
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File not found at {file_path}")
+    # Get file extension
+    file_ext = os.path.splitext(file_path)[1].lower()
+    # Use the appropriate loader based on file extension
+    try:
+        if file_ext in ['.xlsx', '.xls']:
+            # Use ExcelLoader for Excel files
+            from llama_index.readers.file.excel import ExcelLoader
+            loader = ExcelLoader(file_path)
+            return loader.load_data()
+        elif file_ext == '.csv':
+            # Use PandasCSVReader for CSV files
+            try:
+                from llama_index.readers.file.csv import PandasCSVReader
+                from llama_index.core import SimpleDirectoryReader
+                directory = os.path.dirname(file_path) or "."
+                filename = os.path.basename(file_path)
+                parser = PandasCSVReader()
+                file_extractor = {".csv": parser}
+                return SimpleDirectoryReader(
+                    input_dir=directory,
+                    input_files=[filename],
+                    file_extractor=file_extractor
+                ).load_data()
+            except Exception as e:
+                # Fall back to basic CSVReader
+                from llama_index.readers.file.csv import CSVReader
+                from llama_index.core import SimpleDirectoryReader
+                directory = os.path.dirname(file_path) or "."
+                filename = os.path.basename(file_path)
+                parser = CSVReader()
+                file_extractor = {".csv": parser}
+                return SimpleDirectoryReader(
+                    input_dir=directory,
+                    input_files=[filename],
+                    file_extractor=file_extractor
+                ).load_data()
+        else:
+            raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls")
+    except Exception as e:
+        import sys
+        import traceback
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        error_details = traceback.format_exception(exc_type, exc_value, exc_traceback)
+        raise ValueError(f"Error processing file {file_path}: {str(e)}\nDetails: {''.join(error_details)}")
+# Create a function tool for CSV/Excel reading
+csv_excel_reader_tool = FunctionTool.from_defaults(
+    name="csv_excel_reader",
+    description="Reads CSV or Excel files and returns them as Document objects. Uses ExcelLoader for Excel files and PandasCSVReader for CSV files.",
+    fn=csv_excel_reader
+)

tools/multimedia_tools.py CHANGED Viewed

@@ -2,7 +2,31 @@ import os
 from typing import Optional, Dict, Any
 from llama_index.readers.whisper import WhisperReader
 from llama_index.core.tools import FunctionTool
 class WhisperTranscriber:
     """Class for transcribing audio using OpenAI's Whisper model."""

 from typing import Optional, Dict, Any
 from llama_index.readers.whisper import WhisperReader
 from llama_index.core.tools import FunctionTool
+from llama_index.core import SimpleDirectoryReader
+from llama_index.readers.file import (
+    DocxReader,
+    HWPReader,
+    PDFReader,
+    EpubReader,
+    FlatReader,
+    HTMLTagReader,
+    ImageCaptionReader,
+    ImageReader,
+    ImageVisionLLMReader,
+    IPYNBReader,
+    MarkdownReader,
+    MboxReader,
+    PptxReader,
+    PandasCSVReader,
+    VideoAudioReader,
+    UnstructuredReader,
+    PyMuPDFReader,
+    ImageTabularChartReader,
+    XMLReader,
+    PagedCSVReader,
+    CSVReader,
+    RTFReader,
+)
 class WhisperTranscriber:
     """Class for transcribing audio using OpenAI's Whisper model."""