drAbreu commited on
Commit
41cae26
·
1 Parent(s): f08e4fa

Added code exectution, excel, audio capacity to the agent

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitignore CHANGED
@@ -9,4 +9,8 @@ agents/__pycache__
9
 
10
  *.pyc
11
 
12
- .mypy_cache
 
 
 
 
 
9
 
10
  *.pyc
11
 
12
+ .mypy_cache
13
+
14
+ testing_implementation.ipynb
15
+
16
+ .DS_Store
agents/llama_index_agent.py CHANGED
@@ -1,6 +1,7 @@
1
  from llama_index.core.agent.workflow import (
2
  ReActAgent,
3
- FunctionAgent
 
4
  )
5
  from llama_index.core.llms import LLM
6
  import os
@@ -16,6 +17,12 @@ from tools.web_tools import (
16
  tavily_tool,
17
  wikipedia_tool
18
  )
 
 
 
 
 
 
19
  class GaiaAgent(ReActAgent):
20
  """
21
  A flexible ReActAgent for GAIA benchmark tasks that supports multiple LLM providers.
@@ -64,6 +71,8 @@ class GaiaAgent(ReActAgent):
64
  wikipedia_tool.search_data,
65
  tavily_tool.search,
66
  transcribe_audio_tool,
 
 
67
  ]
68
 
69
  # Use default system prompt if not provided
@@ -110,56 +119,6 @@ class GaiaAgent(ReActAgent):
110
  raise ValueError(f"Unsupported model provider: {model_provider}. "
111
  f"Supported providers are: openai, anthropic")
112
 
113
- def _get_default_system_prompt_legacy(self) -> str:
114
- """Return the default system prompt for GAIA benchmark tasks."""
115
- return """
116
- You are the lead coordinator for a team of specialized AI agents tackling the GAIA benchmark. Your job is to analyze each question with extreme precision, determine the exact format required for the answer, break the task into logical steps, and either solve it yourself or delegate to the appropriate specialized agents.
117
-
118
- ## QUESTION ANALYSIS PROCESS
119
- 1. First, carefully read and parse the entire question
120
- 2. Identify the EXACT output format required (single word, name, number, comma-separated list, etc.)
121
- 3. Note any special formatting requirements (alphabetical order, specific notation, etc.)
122
- 4. Identify what type of task this is (research, audio analysis, video analysis, code execution, data analysis, etc.)
123
- 5. Break the question into sequential steps
124
-
125
- ## DELEGATION GUIDELINES
126
- - video_analyst: Use for all YouTube video analysis, visual content identification, or scene description
127
- - audio_analyst: Use for transcribing audio files, identifying speakers, or extracting information from recordings
128
- - researcher: Use for factual queries, literature searches, finding specific information in papers or websites
129
- - code_analyst: Use for executing, debugging or analyzing code snippets
130
- - excel_analyst: Use for analyzing spreadsheets, calculating values, or extracting data from Excel files
131
-
132
- ## CRITICAL RESPONSE RULES
133
- - NEVER include explanations in your final answer
134
- - NEVER include phrases like "the answer is" or "the result is"
135
- - Return EXACTLY what was asked for - no more, no less
136
- - If asked for a name, return ONLY the name
137
- - If asked for a number, return ONLY the number
138
- - If asked for a list, format it EXACTLY as specified (comma-separated, alphabetical, etc.)
139
- - Double-check your answer against the exact output requirements before submitting
140
-
141
- ## EXAMPLES OF PROPER RESPONSES:
142
- Question: "What is the first name of the scientist who discovered penicillin?"
143
- Correct answer: Alexander
144
-
145
- Question: "List the prime numbers between 10 and 20 in ascending order."
146
- Correct answer: 11, 13, 17, 19
147
-
148
- Question: "If you understand this sentence, write the opposite of the word 'right' as the answer."
149
- Correct answer: left
150
-
151
- Question: "How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?"
152
- Correct answer: 572
153
-
154
- For questions with reverse text:
155
- 1. Use your reverse_text_tool to process the text
156
- 2. Understand the instruction in the reversed text
157
- 3. Follow the instruction exactly
158
-
159
- After you have the final answer, verify one last time that it meets ALL formatting requirements from the question before submitting.
160
-
161
- IMPORTANT: Your value is in providing PRECISELY what was asked for - not in showing your work or explaining how you got there.
162
- """
163
 
164
  def _get_default_system_prompt(self) -> str:
165
  """Return the default system prompt for GAIA benchmark tasks."""
@@ -180,18 +139,32 @@ class GaiaAgent(ReActAgent):
180
  - search tools (wikipedia_tool, tavily_tool): For finding information
181
  - transcribe_audio: For transcribing audio files (provide the path to the audio file)
182
  - get_audio_metadata: For getting metadata about audio files
 
183
  3. Document your full analysis, including all key facts, calculations, and relevant information
184
  4. Clearly identify what you believe the correct answer is
185
  5. Be extremely explicit about the required formatting for the final answer
186
 
 
 
 
 
 
 
 
187
  ## HANDLING AUDIO TASKS
188
  When dealing with audio files:
189
  1. Check if an audio file path is available in the context's "audio_file_path" field
190
  2. Always use the transcribe_audio tool with the exact file path provided in the context
191
  3. Extract the specific information requested from the transcript (e.g., ingredients, page numbers, names)
192
- 4. Follow any special formatting instructions (e.g., comma-separated list, alphabetical order)
193
- 5. Make sure to provide exactly what is asked for (e.g., "only list ingredients, not measurements")
194
- 6. For audio tasks, ensure you've captured all relevant spoken content, including names, facts, or quotes as needed
 
 
 
 
 
 
195
 
196
  ## DELEGATION TO WRITER AGENT
197
  After completing your analysis, ALWAYS delegate the final answer preparation to the writer_agent with:
 
1
  from llama_index.core.agent.workflow import (
2
  ReActAgent,
3
+ FunctionAgent,
4
+ CodeActAgent
5
  )
6
  from llama_index.core.llms import LLM
7
  import os
 
17
  tavily_tool,
18
  wikipedia_tool
19
  )
20
+
21
+ from tools.coding_tools import (
22
+ execute_python_file_tool,
23
+ csv_excel_reader_tool
24
+ )
25
+
26
  class GaiaAgent(ReActAgent):
27
  """
28
  A flexible ReActAgent for GAIA benchmark tasks that supports multiple LLM providers.
 
71
  wikipedia_tool.search_data,
72
  tavily_tool.search,
73
  transcribe_audio_tool,
74
+ execute_python_file_tool,
75
+ csv_excel_reader_tool
76
  ]
77
 
78
  # Use default system prompt if not provided
 
119
  raise ValueError(f"Unsupported model provider: {model_provider}. "
120
  f"Supported providers are: openai, anthropic")
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  def _get_default_system_prompt(self) -> str:
124
  """Return the default system prompt for GAIA benchmark tasks."""
 
139
  - search tools (wikipedia_tool, tavily_tool): For finding information
140
  - transcribe_audio: For transcribing audio files (provide the path to the audio file)
141
  - get_audio_metadata: For getting metadata about audio files
142
+ - execute_python_file: For executing Python code files and returning their output
143
  3. Document your full analysis, including all key facts, calculations, and relevant information
144
  4. Clearly identify what you believe the correct answer is
145
  5. Be extremely explicit about the required formatting for the final answer
146
 
147
+ ## HANDLING CODE EXECUTION TASKS
148
+ When dealing with Python code files:
149
+ 1. Check if a Python file path is available in the context's "file_name" field
150
+ 2. Always use the execute_python_file tool with the exact file path provided
151
+ 3. Extract the specific numeric output requested from the execution result
152
+ 4. For code tasks, ensure you've captured the final numeric output exactly as printed by the code
153
+
154
  ## HANDLING AUDIO TASKS
155
  When dealing with audio files:
156
  1. Check if an audio file path is available in the context's "audio_file_path" field
157
  2. Always use the transcribe_audio tool with the exact file path provided in the context
158
  3. Extract the specific information requested from the transcript (e.g., ingredients, page numbers, names)
159
+ 4. For audio tasks, ensure you've captured all relevant spoken content, including names, facts, or quotes as needed
160
+
161
+ ## HANDLING CSV OR EXCEL DATA TASKS
162
+ When dealing with CSV files or data analysis tasks:
163
+ 1. Check if a CSV file path is mentioned in the question or available in the context
164
+ 2. Use the csv_reader tool with the specific CSV file path
165
+ 3. Once the data is loaded, analyze it according to the question requirements
166
+ 4. For data analysis tasks, ensure you've properly processed the CSV data and extracted the requested information
167
+ 5. When calculations or statistics are needed, perform them accurately and document your methodology
168
 
169
  ## DELEGATION TO WRITER AGENT
170
  After completing your analysis, ALWAYS delegate the final answer preparation to the writer_agent with:
app.py CHANGED
@@ -103,7 +103,7 @@ class BasicAgent:
103
  local_file_path = None
104
  if file_name and task_id:
105
  try:
106
- local_file_path = self.download_task_file(task_id)
107
  print(f"Downloaded audio file to {local_file_path}")
108
  except Exception as e:
109
  print(f"Error downloading audio file: {e}")
@@ -144,10 +144,10 @@ class BasicAgent:
144
  print(f"Agent returning answer: {final_answer}")
145
  return final_answer
146
 
147
- def download_task_file(self, task_id: str) -> str:
148
  """Download a task file from the API and return the local file path."""
149
  api_url = DEFAULT_API_URL
150
- file_url = f"{api_url}/files/{task_id}"
151
 
152
  print(f"Downloading file from: {file_url}")
153
 
@@ -160,7 +160,7 @@ class BasicAgent:
160
  downloads_dir.mkdir(exist_ok=True)
161
 
162
  # Save the file to the downloads directory
163
- file_path = downloads_dir / f"{task_id}.mp3"
164
  with open(file_path, "wb") as f:
165
  for chunk in response.iter_content(chunk_size=8192):
166
  f.write(chunk)
@@ -170,6 +170,7 @@ class BasicAgent:
170
  print(f"Error downloading file: {e}")
171
  raise
172
 
 
173
  def run_and_submit_all( profile: gr.OAuthProfile | None):
174
  """
175
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
103
  local_file_path = None
104
  if file_name and task_id:
105
  try:
106
+ local_file_path = self.download_task_file(question_data)
107
  print(f"Downloaded audio file to {local_file_path}")
108
  except Exception as e:
109
  print(f"Error downloading audio file: {e}")
 
144
  print(f"Agent returning answer: {final_answer}")
145
  return final_answer
146
 
147
+ def download_task_file(self, question_data: dict) -> str:
148
  """Download a task file from the API and return the local file path."""
149
  api_url = DEFAULT_API_URL
150
+ file_url = f"{api_url}/files/{question_data['task_id']}"
151
 
152
  print(f"Downloading file from: {file_url}")
153
 
 
160
  downloads_dir.mkdir(exist_ok=True)
161
 
162
  # Save the file to the downloads directory
163
+ file_path = downloads_dir / f"{question_data['file_name']}"
164
  with open(file_path, "wb") as f:
165
  for chunk in response.iter_content(chunk_size=8192):
166
  f.write(chunk)
 
170
  print(f"Error downloading file: {e}")
171
  raise
172
 
173
+
174
  def run_and_submit_all( profile: gr.OAuthProfile | None):
175
  """
176
  Fetches all questions, runs the BasicAgent on them, submits all answers,
requirements.txt CHANGED
@@ -5,4 +5,6 @@ llama-index-tools-wikipedia
5
  llama-index-tools-tavily-research
6
  llama-index-llms-anthropic
7
  llama-index-llms-openai
8
- llama-index-readers-whisper
 
 
 
5
  llama-index-tools-tavily-research
6
  llama-index-llms-anthropic
7
  llama-index-llms-openai
8
+ llama-index-readers-whisper
9
+ llama-index-readers-file
10
+ llama-index-readers-pandas-ai
tools/coding_tools.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import os
3
+ from typing import Optional, Dict, Any
4
+ from llama_index.core.tools import FunctionTool
5
+ from llama_index.core import SimpleDirectoryReader
6
+ from llama_index.readers.file import (
7
+ PandasCSVReader,
8
+ CSVReader,
9
+ )
10
+
11
+
12
+ def execute_python_file(file_path: str) -> Dict[str, Any]:
13
+ """
14
+ Execute a Python file and return its output.
15
+
16
+ Args:
17
+ file_path: Path to the Python file to execute
18
+
19
+ Returns:
20
+ Dictionary containing the output and execution status
21
+ """
22
+ # Check if file exists
23
+ if not os.path.exists(file_path):
24
+ return {
25
+ "success": False,
26
+ "error": f"File not found at {file_path}",
27
+ "output": None
28
+ }
29
+
30
+ try:
31
+ # Execute the Python file and capture output
32
+ result = subprocess.run(
33
+ ["python3", file_path], # Use python3 explicitly
34
+ capture_output=True,
35
+ text=True,
36
+ check=True
37
+ )
38
+
39
+ # Return the stdout output (trimmed of whitespace)
40
+ return {
41
+ "success": True,
42
+ "error": None,
43
+ "output": result.stdout.strip()
44
+ }
45
+ except subprocess.CalledProcessError as e:
46
+ return {
47
+ "success": False,
48
+ "error": f"Execution error: {e}",
49
+ "stderr": e.stderr,
50
+ "output": None
51
+ }
52
+ except Exception as e:
53
+ return {
54
+ "success": False,
55
+ "error": f"Error: {str(e)}",
56
+ "output": None
57
+ }
58
+
59
+
60
+ # Create a function tool for audio transcription
61
+ execute_python_file_tool = FunctionTool.from_defaults(
62
+ name="execute_python_file",
63
+ description="Execute a Python file and return its output.",
64
+ fn=execute_python_file
65
+ )
66
+
67
+
68
+ def csv_excel_reader(file_path: str) -> list:
69
+ """
70
+ Read and parse CSV or Excel files using LlamaIndex document readers.
71
+
72
+ This function determines the file type by extension and uses the appropriate loader:
73
+ - For Excel files (.xlsx, .xls): Uses ExcelLoader
74
+ - For CSV files (.csv): Uses PandasCSVReader with fallback to CSVReader
75
+
76
+ Args:
77
+ file_path (str): Path to the CSV or Excel file to be read
78
+
79
+ Returns:
80
+ list: Document objects containing the parsed data from the file
81
+
82
+ Raises:
83
+ FileNotFoundError: If the specified file doesn't exist
84
+ ValueError: If the file cannot be parsed or has an unsupported extension
85
+
86
+ Examples:
87
+ >>> documents = csv_excel_reader("data/financial_report.csv")
88
+ >>> print(f"Loaded {len(documents)} documents")
89
+ >>>
90
+ >>> # Or with Excel files
91
+ >>> documents = csv_excel_reader("data/quarterly_reports.xlsx")
92
+ >>> print(f"Loaded {len(documents)} documents from Excel file")
93
+ """
94
+ import os
95
+
96
+ # Check if file exists
97
+ if not os.path.exists(file_path):
98
+ raise FileNotFoundError(f"File not found at {file_path}")
99
+
100
+ # Get file extension
101
+ file_ext = os.path.splitext(file_path)[1].lower()
102
+
103
+ # Use the appropriate loader based on file extension
104
+ try:
105
+ if file_ext in ['.xlsx', '.xls']:
106
+ # Use ExcelLoader for Excel files
107
+ from llama_index.readers.file.excel import ExcelLoader
108
+ loader = ExcelLoader(file_path)
109
+ return loader.load_data()
110
+
111
+ elif file_ext == '.csv':
112
+ # Use PandasCSVReader for CSV files
113
+ try:
114
+ from llama_index.readers.file.csv import PandasCSVReader
115
+ from llama_index.core import SimpleDirectoryReader
116
+
117
+ directory = os.path.dirname(file_path) or "."
118
+ filename = os.path.basename(file_path)
119
+
120
+ parser = PandasCSVReader()
121
+ file_extractor = {".csv": parser}
122
+ return SimpleDirectoryReader(
123
+ input_dir=directory,
124
+ input_files=[filename],
125
+ file_extractor=file_extractor
126
+ ).load_data()
127
+
128
+ except Exception as e:
129
+ # Fall back to basic CSVReader
130
+ from llama_index.readers.file.csv import CSVReader
131
+ from llama_index.core import SimpleDirectoryReader
132
+
133
+ directory = os.path.dirname(file_path) or "."
134
+ filename = os.path.basename(file_path)
135
+
136
+ parser = CSVReader()
137
+ file_extractor = {".csv": parser}
138
+ return SimpleDirectoryReader(
139
+ input_dir=directory,
140
+ input_files=[filename],
141
+ file_extractor=file_extractor
142
+ ).load_data()
143
+ else:
144
+ raise ValueError(f"Unsupported file extension: {file_ext}. Supported extensions are .csv, .xlsx, and .xls")
145
+
146
+ except Exception as e:
147
+ import sys
148
+ import traceback
149
+
150
+ exc_type, exc_value, exc_traceback = sys.exc_info()
151
+ error_details = traceback.format_exception(exc_type, exc_value, exc_traceback)
152
+
153
+ raise ValueError(f"Error processing file {file_path}: {str(e)}\nDetails: {''.join(error_details)}")
154
+
155
+
156
+ # Create a function tool for CSV/Excel reading
157
+ csv_excel_reader_tool = FunctionTool.from_defaults(
158
+ name="csv_excel_reader",
159
+ description="Reads CSV or Excel files and returns them as Document objects. Uses ExcelLoader for Excel files and PandasCSVReader for CSV files.",
160
+ fn=csv_excel_reader
161
+ )
tools/multimedia_tools.py CHANGED
@@ -2,7 +2,31 @@ import os
2
  from typing import Optional, Dict, Any
3
  from llama_index.readers.whisper import WhisperReader
4
  from llama_index.core.tools import FunctionTool
5
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  class WhisperTranscriber:
8
  """Class for transcribing audio using OpenAI's Whisper model."""
 
2
  from typing import Optional, Dict, Any
3
  from llama_index.readers.whisper import WhisperReader
4
  from llama_index.core.tools import FunctionTool
5
+ from llama_index.core import SimpleDirectoryReader
6
+ from llama_index.readers.file import (
7
+ DocxReader,
8
+ HWPReader,
9
+ PDFReader,
10
+ EpubReader,
11
+ FlatReader,
12
+ HTMLTagReader,
13
+ ImageCaptionReader,
14
+ ImageReader,
15
+ ImageVisionLLMReader,
16
+ IPYNBReader,
17
+ MarkdownReader,
18
+ MboxReader,
19
+ PptxReader,
20
+ PandasCSVReader,
21
+ VideoAudioReader,
22
+ UnstructuredReader,
23
+ PyMuPDFReader,
24
+ ImageTabularChartReader,
25
+ XMLReader,
26
+ PagedCSVReader,
27
+ CSVReader,
28
+ RTFReader,
29
+ )
30
 
31
  class WhisperTranscriber:
32
  """Class for transcribing audio using OpenAI's Whisper model."""