hgmiya commited on
Commit
aa3be20
·
1 Parent(s): 81917a3

Implement GAIA Solver with enhanced agent capabilities and tool integration

Browse files

- Added Google ADK agents for code execution, search, and data analysis.
- Integrated YouTube video analysis and image understanding tools.
- Developed audio transcription and Excel to CSV conversion functionalities.
- Established asynchronous agent call mechanism for improved performance.
- Configured environment variable loading for API keys.
- Created a structured approach for handling user queries and file inputs.
- Enhanced error handling and logging throughout the application.
- Updated requirements.txt to include necessary libraries.
- Added .gitignore to exclude unnecessary files and directories.

Files changed (6) hide show
  1. .gitignore +147 -0
  2. __init__.py +1 -0
  3. agent.py +530 -0
  4. app.py +489 -16
  5. excel_test.py +52 -0
  6. requirements.txt +4 -1
.gitignore ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # PEP 582; used by PDM, PEP 582 compatible tools and project workflow
88
+ __pypackages__/
89
+
90
+ # Celery stuff
91
+ celerybeat-schedule
92
+ celerybeat.pid
93
+
94
+ # SageMath parsed files
95
+ *.sage.py
96
+
97
+ # Environments
98
+ .env
99
+ .venv
100
+ env/
101
+ venv/
102
+ ENV/
103
+ env.bak/
104
+ venv.bak/
105
+
106
+ # Spyder project settings
107
+ .spyderproject
108
+ .spyproject
109
+
110
+ # Rope project settings
111
+ .ropeproject
112
+
113
+ # mkdocs documentation
114
+ /site
115
+
116
+ # mypy
117
+ .mypy_cache/
118
+ .dmypy.json
119
+ dmypy.json
120
+
121
+ # Pyre type checker
122
+ .pyre/
123
+
124
+ # pytype static analysis results
125
+ .pytype/
126
+
127
+ # Cython debug symbols
128
+ cython_debug/
129
+
130
+ # VS Code settings folder
131
+ .vscode/
132
+
133
+ # IDE specific files (JetBrains, Sublime Text, etc.)
134
+ .idea/
135
+ *.iml
136
+ *.sublime-project
137
+ *.sublime-workspace
138
+
139
+ # OS generated files
140
+ .DS_Store
141
+ Thumbs.db
142
+
143
+ # Sensitive credentials - Add the specific path from your .env file
144
+ /path/to/your/google_cloud_credentials.json
145
+
146
+ # Add any other files or directories specific to your project below
147
+ # e.g., logs/, temp/, data/
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from . import agent
agent.py ADDED
@@ -0,0 +1,530 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import inspect
5
+ import pandas as pd
6
+
7
+ import asyncio
8
+
9
+ from google import genai
10
+ from google.adk.agents import Agent
11
+ from google.adk.runners import Runner
12
+ from google.adk.sessions import InMemorySessionService
13
+ from google.genai import types
14
+ from google.adk.tools import agent_tool
15
+ from google.adk.agents import Agent
16
+ from google.adk.tools import google_search, built_in_code_execution
17
+ from google.adk.agents import LlmAgent
18
+
19
+ from openpyxl import load_workbook
20
+
21
+ import warnings
22
+ # Ignore all warnings
23
+ warnings.filterwarnings("ignore")
24
+
25
+ import logging
26
+ logging.basicConfig(level=logging.ERROR)
27
+
28
+ # Load API KEYs
29
+ from dotenv import load_dotenv
30
+ load_dotenv()
31
+ GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
32
+
33
+
34
+ # Agent Tools
35
+ coding_agent = LlmAgent(
36
+ model='gemini-2.0-flash',
37
+ name='CodeAgent',
38
+ instruction="""You are a calculator agent.
39
+ When given a mathematical expression, write and execute Python code to calculate the result.
40
+ Return only the final numerical result as plain text, without markdown or code blocks.
41
+ """,
42
+ description="Executes Python code to perform calculations.",
43
+ tools=[built_in_code_execution],
44
+ )
45
+
46
+ code_execution_agent = LlmAgent(
47
+ model='gemini-2.0-flash',
48
+ name='CodeAgent',
49
+ instruction="""
50
+ You're a specialist in Code Execution. Execute Python code to get the result.
51
+ Return only the final numerical result as plain text, without markdown or code blocks.
52
+
53
+ If you given the python code, do not add, subtract any codes from original one.
54
+ """,
55
+ description="Executes Python code. It will not generate code.",
56
+ tools=[built_in_code_execution],
57
+ )
58
+
59
+ search_agent = Agent(
60
+ name="basic_search_agent",
61
+ model="gemini-2.0-flash",
62
+ description="Agent to answer questions using Google Search.",
63
+ instruction="I can answer your questions by searching the internet. Just ask me anything!",
64
+ # google_search is a pre-built tool which allows the agent to perform Google searches.
65
+ tools=[google_search]
66
+ )
67
+
68
+
69
+ # YouTube Tools
70
+ def understand_youtube_video(video_url: str, question: str) -> str:
71
+ """
72
+ Given a YouTube video URL and question, this will use the Gemini API to analyze the video content and provide an answer.
73
+
74
+ Args:
75
+ video_url (str): The URL of the YouTube video you want to analyze (e.g. "https://www.youtube.com/watch?v=...").
76
+ If Gemini cannot handle this directly, you may need a different format, such as a GCS URI.
77
+ question (str): The specific question about the video content.
78
+
79
+ Returns:
80
+ str: The answer generated by the Gemini model based on the video and question.
81
+ Returns an error message if processing fails.
82
+
83
+ """
84
+ print(f"--- Analyzing YouTube Video ---")
85
+ print(f"URL: {video_url}")
86
+ print(f"Question: {question}")
87
+
88
+ try:
89
+ client = genai.Client(api_key=GOOGLE_API_KEY)
90
+ model='models/gemini-2.0-flash',
91
+
92
+ response = client.models.generate_content(
93
+ model='models/gemini-2.0-flash',
94
+ contents=types.Content(
95
+ parts=[
96
+ types.Part(
97
+ file_data=types.FileData(file_uri=video_url)
98
+ ),
99
+ types.Part(text=question)
100
+ ]
101
+ )
102
+ )
103
+
104
+ print("--- Gemini Response Received ---")
105
+ if hasattr(response, 'text'):
106
+ return response.text
107
+ elif response.parts:
108
+ return "".join(part.text for part in response.parts if hasattr(part, 'text'))
109
+ else:
110
+ block_reason = ""
111
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
112
+ block_reason = f" Reason: {response.prompt_feedback.block_reason.name}"
113
+ return f"Model did not return text content.{block_reason}"
114
+ except Exception as e:
115
+ print(f"Error processing YouTube video '{video_url}' with Gemini: {e}")
116
+ return f"Sorry, an error occurred while analyzing the video. Please check the URL and ensure the video is accessible. Error details: {str(e)}"
117
+
118
+
119
+ # Image Tools
120
+ def understand_image(image_file_name: str) -> str:
121
+ """
122
+ Given an image file , this will analyze the image in detail and describe its contents in as much detail as possible.
123
+
124
+ Args:
125
+ image_file_name (str): The file name of the image to analyze. Which given as "file_name" parameter in the question.
126
+
127
+ Returns:
128
+ str: The response text generated by the Gemini model.
129
+ """
130
+ image_url = os.path.join("./GAIA_resource/" , image_file_name)
131
+ print("--- Analyzing Image ---")
132
+ print(f"Image URL/Path: {image_url}")
133
+
134
+ prompt = """
135
+ Analyze the image in detail and describe its contents in as much detail as possible.
136
+ For example, give someone a chess board and describe where each piece is.
137
+
138
+ The description should include the following information:
139
+ - General overview of the image
140
+ - Details of important elements and features (e.g., location relationships, attributes, etc.)
141
+ - Identification of specific objects or characters (e.g., game piece names, positions, people, etc.)
142
+
143
+ # Steps
144
+ 1. Examine the image as a whole and identify the main elements.
145
+ 2. Examine each element in detail and identify what it is.
146
+ 3. Develop a description of each element based on its characteristic relationships and positions.
147
+ 4. Finally, summarize the overall scene or situation.
148
+
149
+ # Output Format
150
+ Provide detailed descriptions in paragraphs of text, using bullet points where necessary.
151
+
152
+ """
153
+
154
+ try:
155
+ # Fetch the image data
156
+ if image_url.startswith("http"):
157
+ image_bytes = requests.get(image_url).content
158
+ else:
159
+ with open(image_url, "rb") as f:
160
+ image_bytes = f.read()
161
+
162
+ # Create image part
163
+ image_part = types.Part.from_bytes(
164
+ data=image_bytes,
165
+ mime_type="image/jpeg"
166
+ )
167
+
168
+ # Initialize the Gemini client
169
+ client = genai.Client(api_key=GOOGLE_API_KEY)
170
+ # Build contents with question text and image part
171
+ response = client.models.generate_content(
172
+ model="gemini-2.0-flash-exp",
173
+ contents=[
174
+ prompt,
175
+ image_part
176
+ ]
177
+ )
178
+
179
+ print("--- Gemini Response Received ---")
180
+ # Extract text from the response
181
+ if hasattr(response, 'text'):
182
+ return response.text
183
+ elif getattr(response, 'parts', None):
184
+ return "".join(part.text for part in response.parts if hasattr(part, 'text'))
185
+ else:
186
+ block_reason = ""
187
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
188
+ block_reason = f" Reason: {response.prompt_feedback.block_reason.name}"
189
+ return f"Model did not return text content.{block_reason}"
190
+
191
+ except Exception as e:
192
+ print(f"Error processing image '{image_url}' with Gemini: {e}")
193
+ return f"Sorry, an error occurred while analyzing the image. Please check the image URL or path. Error details: {str(e)}"
194
+
195
+ # Audio Tool
196
+ def transcribe_audio(audio_path: str) -> str:
197
+ """
198
+ Given an audio file path or URL, uploads the file to Gemini API and generates a speech transcript.
199
+
200
+ Args:
201
+ audio_path (str): The URL or local file path of the audio to transcribe.
202
+
203
+ Returns:
204
+ str: A Markdown-formatted transcript of the speech, or an error message.
205
+ """
206
+ print("--- Transcribing Audio ---")
207
+ print(f"Audio Path: {audio_path}")
208
+ audio_path = os.path.join("./GAIA_resource/", audio_path)
209
+
210
+ try:
211
+ # Initialize Gemini client
212
+ client = genai.Client(api_key=GOOGLE_API_KEY)
213
+ # Upload the audio file
214
+ uploaded = client.files.upload(file=audio_path)
215
+ prompt = "Generate a transcript of the speech."
216
+
217
+ # Generate transcript
218
+ response = client.models.generate_content(
219
+ model="gemini-2.0-flash",
220
+ contents=[prompt, uploaded]
221
+ )
222
+
223
+ print("--- Gemini Response Received ---")
224
+ # Extract transcript text
225
+ if hasattr(response, 'text'):
226
+ transcript = response.text
227
+ elif getattr(response, 'parts', None):
228
+ transcript = "".join(part.text for part in response.parts if hasattr(part, 'text'))
229
+ else:
230
+ transcript = "Model did not return text content."
231
+
232
+ # Format as Markdown
233
+ markdown_transcript = (
234
+ "## Audio Transcription Result\n"
235
+ f"**Transcript:**\n{transcript}"
236
+ )
237
+ return markdown_transcript
238
+
239
+ except Exception as e:
240
+ error_msg = f"Error transcribing audio '{audio_path}': {str(e)}"
241
+ return f"**Error:** {error_msg}"
242
+
243
+
244
+ # Excel Tool
245
+ def excel_to_csv(excel_path: str) -> str:
246
+ """
247
+ Given an Excel file path or URL and an optional sheet name,
248
+ reads the spreadsheet using openpyxl and returns its contents as CSV text.
249
+
250
+ Args:
251
+ excel_path (str): The URL or local file path of the Excel file to convert.
252
+
253
+ Returns:
254
+ str: The CSV-formatted content of the sheet.
255
+ """
256
+ print("--- Converting Excel to CSV ---")
257
+ print(f"Excel Path: {excel_path}")
258
+ excel_path = os.path.join("./GAIA_resource/", excel_path)
259
+
260
+ try:
261
+ # Load workbook from URL or local file
262
+ if excel_path.startswith("http"):
263
+ response = requests.get(excel_path)
264
+ response.raise_for_status()
265
+ data_stream = BytesIO(response.content)
266
+ wb = load_workbook(filename=data_stream, data_only=True)
267
+ else:
268
+ wb = load_workbook(filename=excel_path, data_only=True)
269
+
270
+ # Select worksheet
271
+ ws = wb.active
272
+
273
+ # Build CSV lines manually
274
+ lines = []
275
+ for row in ws.iter_rows(values_only=True):
276
+ # Convert each cell to string, using empty string for None
277
+ str_cells = ["" if cell is None else str(cell) for cell in row]
278
+ # Join cells with commas
279
+ line = ",".join(str_cells)
280
+ lines.append(line)
281
+
282
+ # Combine all lines into one CSV string
283
+ print("Converted Excel to CSV result : ", lines)
284
+ return "\n".join(lines)
285
+
286
+ except Exception as e:
287
+ return f"Error converting Excel to CSV: {e}"
288
+
289
+ data_analyzer_agent = LlmAgent(
290
+ model="gemini-2.5-flash-preview-04-17",
291
+ name="data_analyzer_agent",
292
+ description="When data is provided, analyze it and derive an appropriate answer.",
293
+ instruction="""
294
+ # Steps
295
+ 1. **Data Review**: Understand the data provided and understand what it shows.
296
+ 2. **Prepare for Analysis**: If necessary, clean the data and prepare it for analysis.
297
+ 3. **Data Analysis**: Analyze the data using appropriate methods to find meaningful information and trends.
298
+ 4. **Interpretation**: Interpret the analysis results to answer questions and doubts.
299
+ 5. **Present Conclusions**: Present your conclusions and insights in a logical summary.
300
+
301
+ # Output Format
302
+ - State your conclusions in a short sentence, but make sure they are clear and specific.
303
+ - If necessary, use tables and graphs to provide additional information.
304
+
305
+ # Examples
306
+ - **Input Data**:
307
+ - Survey data on age, gender, occupation, and annual income
308
+ - **Analysis Results**:
309
+ - The older the person, the higher the annual income tends to be.
310
+ - **Statement of conclusion**:
311
+ - "The survey data shows that the older you are, the higher your average annual income is."
312
+
313
+ # Notes
314
+ - If your data set is very large, consider using sample data or segmenting your data for analysis.
315
+ - Distinguish between qualitative and quantitative data and choose the appropriate analysis method for each.
316
+ """,
317
+ tools=[excel_to_csv] # Provide the function directly
318
+ )
319
+
320
+
321
+ # Read file ascii
322
+ def read_file_ascii(file_path: str) -> str:
323
+ """
324
+ Given a file URL or local file path, reads the file content and returns it as an ASCII string.
325
+
326
+ Args:
327
+ file_path (str): The URL or local file path of the file to read.
328
+
329
+ Returns:
330
+ str: The ASCII-decoded content of the file, or an error message on failure.
331
+ """
332
+ print("File Path : ", file_path)
333
+ file_path = os.path.join("./GAIA_resource/", file_path)
334
+
335
+ try:
336
+ # Load data from URL or local file
337
+ if file_path.startswith("http"):
338
+ response = requests.get(file_path)
339
+ response.raise_for_status()
340
+ data_bytes = response.content
341
+ else:
342
+ with open(file_path, "rb") as f:
343
+ data_bytes = f.read()
344
+
345
+ # Decode bytes to ASCII string, replacing errors
346
+ ascii_str = data_bytes.decode("ascii", errors="replace")
347
+ return ascii_str
348
+
349
+ except Exception as e:
350
+ return f"Error reading file as ASCII: {e}"
351
+
352
+
353
+ # Call Agent Async
354
+ async def call_agent_async(query: str, runner, user_id, session_id):
355
+ """Sends a query to the agent and prints the final response."""
356
+ print(f"\n>>> User Query: {query}")
357
+
358
+ # Prepare the user's message in ADK format
359
+ content = types.Content(role='user', parts=[types.Part(text=query)])
360
+
361
+ final_response_text = "Agent did not produce a final response." # Default
362
+
363
+ # Key Concept: run_async executes the agent logic and yields Events.
364
+ # We iterate through events to find the final answer.
365
+ async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=content):
366
+ # Key Concept: is_final_response() marks the concluding message for the turn.
367
+ if event.is_final_response():
368
+ if event.content and event.content.parts:
369
+ # Assuming text response in the first part
370
+ final_response_text = event.content.parts[0].text
371
+ elif event.actions and event.actions.escalate: # Handle potential errors/escalations
372
+ final_response_text = f"Agent escalated: {event.error_message or 'No specific message.'}"
373
+ # Add more checks here if needed (e.g., specific error codes)
374
+ break # Stop processing events once the final response is found
375
+
376
+ print(f"<<< Agent Response: {final_response_text}")
377
+ return final_response_text # Return the final response text
378
+
379
+
380
+ # (Keep Constants as is)
381
+ # --- Constants ---
382
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
383
+
384
+
385
+ # --- Basic Agent Definition ---
386
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
387
+ #class BasicAgent:
388
+ # def __init__(self):
389
+ # print("BasicAgent initialized.")
390
+ # def __call__(self, question: str) -> str:
391
+ # print(f"Agent received question (first 50 chars): {question[:50]}...")
392
+ # #fixed_answer = "This is a default answer."
393
+ # #print(f"Agent returning fixed answer: {fixed_answer}")
394
+ #
395
+ # return fixed_answer
396
+
397
+
398
+ description_text = """
399
+ You are GAIA Solver, a highly capable AI assistant designed to answer questions from the GAIA benchmark accurately and concisely using a suite of available tools. Your goal is to provide the precise answer in the requested format based *only* on the provided question text.
400
+ """
401
+
402
+ instruction_text = """
403
+
404
+ Thinking Process:
405
+ 1. **Analyze Question & Identify Files:** Carefully read the question. Determine the core task and the **exact final answer format**. Check if the question explicitly mentions an attached file (image, Excel, audio, code).
406
+ 2. **Identify Filename:** If a file is mentioned, identify its filename from the text (e.g., "Homework.mp3", "image.png"). If no specific filename is given for a required file type, state that you need the filename. **Do not guess filenames.**
407
+ 3. **Plan:** Create a step-by-step plan using tools. If a file is needed, include the correct tool call with the identified filename.
408
+ 4. **Execute & Refine:** Execute the plan. Pass correct arguments (especially filenames). Evaluate tool outputs. If errors occur (e.g., file not found, API errors) or info is insufficient, revise the plan (e.g., use `web_search`, different tool prompts).
409
+ 5. **Synthesize Answer:** Combine information. Use `execute_python_code` for final formatting/calculations.
410
+ 6. **Final Output:** Generate **only the final answer** in the requested format. No extra text. If the answer cannot be found or a required filename was missing/invalid, output: "I could not find the answer."
411
+
412
+ Constraints:
413
+ - Base actions *only* on the provided question text.
414
+ - Adhere strictly to the requested output format.
415
+ """
416
+
417
+
418
+ async def main():
419
+
420
+ api_url = DEFAULT_API_URL
421
+ questions_url = f"{api_url}/questions"
422
+ submit_url = f"{api_url}/submit"
423
+
424
+ # 1. Instantiate Agent ( modify this part to create your agent)
425
+ try:
426
+ root_agent = Agent(
427
+ name = "root_agent",
428
+ model = "gemini-2.5-pro-preview-03-25",
429
+ description = description_text,
430
+ instruction = instruction_text,
431
+ tools = [
432
+ agent_tool.AgentTool(agent=search_agent),
433
+ agent_tool.AgentTool(agent=coding_agent),
434
+ agent_tool.AgentTool(agent=code_execution_agent),
435
+ understand_youtube_video,
436
+ understand_image,
437
+ transcribe_audio,
438
+ agent_tool.AgentTool(agent=data_analyzer_agent),
439
+ read_file_ascii,
440
+ ]
441
+ )
442
+ except Exception as e:
443
+ print(f"Error instantiating agent: {e}")
444
+ return f"Error initializing agent: {e}", None
445
+
446
+
447
+ # 2. Fetch Questions
448
+ print(f"Fetching questions from: {questions_url}")
449
+ try:
450
+ response = requests.get(questions_url, timeout=15)
451
+ response.raise_for_status()
452
+ questions_data = response.json()
453
+ if not questions_data:
454
+ print("Fetched questions list is empty.")
455
+ return "Fetched questions list is empty or invalid format.", None
456
+ print(f"Fetched {len(questions_data)} questions.")
457
+ except requests.exceptions.RequestException as e:
458
+ print(f"Error fetching questions: {e}")
459
+ return f"Error fetching questions: {e}", None
460
+ except requests.exceptions.JSONDecodeError as e:
461
+ print(f"Error decoding JSON response from questions endpoint: {e}")
462
+ print(f"Response text: {response.text[:500]}")
463
+ return f"Error decoding server response for questions: {e}", None
464
+ except Exception as e:
465
+ print(f"An unexpected error occurred fetching questions: {e}")
466
+ return f"An unexpected error occurred fetching questions: {e}", None
467
+
468
+ # 3. Run your Agent
469
+ results_log = []
470
+ answers_payload = []
471
+ print(f"Running agent on {len(questions_data)} questions...")
472
+ i = 0
473
+ for item in questions_data:
474
+ i += 1
475
+ if i < 12:
476
+ continue
477
+ elif i > 12:
478
+ break
479
+ task_id = item.get("task_id")
480
+ question_text = item.get("question")
481
+ question_file_name = item.get("file_name")
482
+ question_all = question_text + " file_name = " + question_file_name
483
+ if not task_id or question_text is None:
484
+ print(f"Skipping item with missing task_id or question: {item}")
485
+ continue
486
+ try:
487
+ APP_NAME = "gaia_agent"
488
+ USER_ID = "user_1"
489
+ SESSION_ID = item.get("task_id")
490
+
491
+ session_service = InMemorySessionService()
492
+
493
+ session = session_service.create_session(
494
+ app_name=APP_NAME,
495
+ user_id=USER_ID,
496
+ session_id=SESSION_ID
497
+ )
498
+ runner = Runner(
499
+ agent=root_agent, # The agent we want to run
500
+ app_name=APP_NAME, # Associates runs with our app
501
+ session_service=session_service # Uses our session manager
502
+ )
503
+ submitted_answer = await call_agent_async(question_all,
504
+ runner=runner,
505
+ user_id=USER_ID,
506
+ session_id=SESSION_ID)
507
+
508
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
509
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
510
+ except Exception as e:
511
+ print(f"Error running agent on task {task_id}: {e}")
512
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
513
+
514
+ if not answers_payload:
515
+ print("Agent did not produce any answers to submit.")
516
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
517
+
518
+ # 4. Prepare Submission
519
+ #submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
520
+ #status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
521
+ #print(status_update)
522
+
523
+ # スクリプトが直接実行された場合にここから開始します
524
+ if __name__ == "__main__":
525
+ # asyncio.run() を使って非同期の main 関数を実行します
526
+ # これがないと async def main() は実行されません
527
+ try:
528
+ asyncio.run(main())
529
+ except Exception as e:
530
+ print(f"An error occurred during the asyncio run: {e}")
app.py CHANGED
@@ -4,22 +4,444 @@ import requests
4
  import inspect
5
  import pandas as pd
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
@@ -38,9 +460,32 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
 
 
 
 
 
 
 
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -76,11 +521,33 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
@@ -90,6 +557,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
93
 
94
  # 4. Prepare Submission
95
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
@@ -145,11 +613,15 @@ with gr.Blocks() as demo:
145
  gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
 
 
 
 
 
148
  **Instructions:**
149
 
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
 
154
  ---
155
  **Disclaimers:**
@@ -193,4 +665,5 @@ if __name__ == "__main__":
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
 
4
  import inspect
5
  import pandas as pd
6
 
7
+ import asyncio
8
+
9
+ from google import genai
10
+ from google.adk.agents import Agent
11
+ from google.adk.runners import Runner
12
+ from google.adk.sessions import InMemorySessionService
13
+ from google.genai import types
14
+ from google.adk.tools import agent_tool
15
+ from google.adk.agents import Agent
16
+ from google.adk.tools import google_search, built_in_code_execution
17
+ from google.adk.agents import LlmAgent
18
+
19
+ from openpyxl import load_workbook
20
+
21
+ import warnings
22
+ # Ignore all warnings
23
+ warnings.filterwarnings("ignore")
24
+
25
+ import logging
26
+ logging.basicConfig(level=logging.ERROR)
27
+
28
+
29
+ # Load API KEYs
30
+ os.getenv('GOOGLE_API_KEY')
31
+
32
+
33
+ # Agent Tools
34
+ coding_agent = LlmAgent(
35
+ model='gemini-2.0-flash',
36
+ name='CodeAgent',
37
+ instruction="""You are a calculator agent.
38
+ When given a mathematical expression, write and execute Python code to calculate the result.
39
+ Return only the final numerical result as plain text, without markdown or code blocks.
40
+ """,
41
+ description="Executes Python code to perform calculations.",
42
+ tools=[built_in_code_execution],
43
+ )
44
+
45
+ code_execution_agent = LlmAgent(
46
+ model='gemini-2.0-flash',
47
+ name='CodeAgent',
48
+ instruction="""
49
+ You're a specialist in Code Execution. Execute Python code to get the result.
50
+ Return only the final numerical result as plain text, without markdown or code blocks.
51
+
52
+ If you given the python code, do not add, subtract any codes from original one.
53
+ """,
54
+ description="Executes Python code. It will not generate code.",
55
+ tools=[built_in_code_execution],
56
+ )
57
+
58
+ search_agent = Agent(
59
+ name="basic_search_agent",
60
+ model="gemini-2.0-flash",
61
+ description="Agent to answer questions using Google Search.",
62
+ instruction="I can answer your questions by searching the internet. Just ask me anything!",
63
+ # google_search is a pre-built tool which allows the agent to perform Google searches.
64
+ tools=[google_search]
65
+ )
66
+
67
+
68
+ # YouTube Tools
69
+ def understand_youtube_video(video_url: str, question: str) -> str:
70
+ """
71
+ Given a YouTube video URL and question, this will use the Gemini API to analyze the video content and provide an answer.
72
+
73
+ Args:
74
+ video_url (str): The URL of the YouTube video you want to analyze (e.g. "https://www.youtube.com/watch?v=...").
75
+ If Gemini cannot handle this directly, you may need a different format, such as a GCS URI.
76
+ question (str): The specific question about the video content.
77
+
78
+ Returns:
79
+ str: The answer generated by the Gemini model based on the video and question.
80
+ Returns an error message if processing fails.
81
+
82
+ """
83
+ print(f"--- Analyzing YouTube Video ---")
84
+ print(f"URL: {video_url}")
85
+ print(f"Question: {question}")
86
+
87
+ try:
88
+ client = genai.Client(api_key=GOOGLE_API_KEY)
89
+ model='models/gemini-2.0-flash',
90
+
91
+ response = client.models.generate_content(
92
+ model='models/gemini-2.0-flash',
93
+ contents=types.Content(
94
+ parts=[
95
+ types.Part(
96
+ file_data=types.FileData(file_uri=video_url)
97
+ ),
98
+ types.Part(text=question)
99
+ ]
100
+ )
101
+ )
102
+
103
+ print("--- Gemini Response Received ---")
104
+ if hasattr(response, 'text'):
105
+ return response.text
106
+ elif response.parts:
107
+ return "".join(part.text for part in response.parts if hasattr(part, 'text'))
108
+ else:
109
+ block_reason = ""
110
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
111
+ block_reason = f" Reason: {response.prompt_feedback.block_reason.name}"
112
+ return f"Model did not return text content.{block_reason}"
113
+ except Exception as e:
114
+ print(f"Error processing YouTube video '{video_url}' with Gemini: {e}")
115
+ return f"Sorry, an error occurred while analyzing the video. Please check the URL and ensure the video is accessible. Error details: {str(e)}"
116
+
117
+
118
+ # Image Tools
119
+ def understand_image(image_file_name: str) -> str:
120
+ """
121
+ Given an image file , this will analyze the image in detail and describe its contents in as much detail as possible.
122
+
123
+ Args:
124
+ image_file_name (str): The file name of the image to analyze. Which given as "file_name" parameter in the question.
125
+
126
+ Returns:
127
+ str: The response text generated by the Gemini model.
128
+ """
129
+ image_url = os.path.join("./GAIA_resource/" , image_file_name)
130
+ print("--- Analyzing Image ---")
131
+ print(f"Image URL/Path: {image_url}")
132
+
133
+ prompt = """
134
+ Analyze the image in detail and describe its contents in as much detail as possible.
135
+ For example, give someone a chess board and describe where each piece is.
136
+
137
+ The description should include the following information:
138
+ - General overview of the image
139
+ - Details of important elements and features (e.g., location relationships, attributes, etc.)
140
+ - Identification of specific objects or characters (e.g., game piece names, positions, people, etc.)
141
+
142
+ # Steps
143
+ 1. Examine the image as a whole and identify the main elements.
144
+ 2. Examine each element in detail and identify what it is.
145
+ 3. Develop a description of each element based on its characteristic relationships and positions.
146
+ 4. Finally, summarize the overall scene or situation.
147
+
148
+ # Output Format
149
+ Provide detailed descriptions in paragraphs of text, using bullet points where necessary.
150
+
151
+ """
152
+
153
+ try:
154
+ # Fetch the image data
155
+ if image_url.startswith("http"):
156
+ image_bytes = requests.get(image_url).content
157
+ else:
158
+ with open(image_url, "rb") as f:
159
+ image_bytes = f.read()
160
+
161
+ # Create image part
162
+ image_part = types.Part.from_bytes(
163
+ data=image_bytes,
164
+ mime_type="image/jpeg"
165
+ )
166
+
167
+ # Initialize the Gemini client
168
+ client = genai.Client(api_key=GOOGLE_API_KEY)
169
+ # Build contents with question text and image part
170
+ response = client.models.generate_content(
171
+ model="gemini-2.0-flash-exp",
172
+ contents=[
173
+ prompt,
174
+ image_part
175
+ ]
176
+ )
177
+
178
+ print("--- Gemini Response Received ---")
179
+ # Extract text from the response
180
+ if hasattr(response, 'text'):
181
+ return response.text
182
+ elif getattr(response, 'parts', None):
183
+ return "".join(part.text for part in response.parts if hasattr(part, 'text'))
184
+ else:
185
+ block_reason = ""
186
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
187
+ block_reason = f" Reason: {response.prompt_feedback.block_reason.name}"
188
+ return f"Model did not return text content.{block_reason}"
189
+
190
+ except Exception as e:
191
+ print(f"Error processing image '{image_url}' with Gemini: {e}")
192
+ return f"Sorry, an error occurred while analyzing the image. Please check the image URL or path. Error details: {str(e)}"
193
+
194
+ # Audio Tool
195
+ def transcribe_audio(audio_path: str) -> str:
196
+ """
197
+ Given an audio file path or URL, uploads the file to Gemini API and generates a speech transcript.
198
+
199
+ Args:
200
+ audio_path (str): The URL or local file path of the audio to transcribe.
201
+
202
+ Returns:
203
+ str: A Markdown-formatted transcript of the speech, or an error message.
204
+ """
205
+ print("--- Transcribing Audio ---")
206
+ print(f"Audio Path: {audio_path}")
207
+ audio_path = os.path.join("./GAIA_resource/", audio_path)
208
+
209
+ try:
210
+ # Initialize Gemini client
211
+ client = genai.Client(api_key=GOOGLE_API_KEY)
212
+ # Upload the audio file
213
+ uploaded = client.files.upload(file=audio_path)
214
+ prompt = "Generate a transcript of the speech."
215
+
216
+ # Generate transcript
217
+ response = client.models.generate_content(
218
+ model="gemini-2.0-flash",
219
+ contents=[prompt, uploaded]
220
+ )
221
+
222
+ print("--- Gemini Response Received ---")
223
+ # Extract transcript text
224
+ if hasattr(response, 'text'):
225
+ transcript = response.text
226
+ elif getattr(response, 'parts', None):
227
+ transcript = "".join(part.text for part in response.parts if hasattr(part, 'text'))
228
+ else:
229
+ transcript = "Model did not return text content."
230
+
231
+ # Format as Markdown
232
+ markdown_transcript = (
233
+ "## Audio Transcription Result\n"
234
+ f"**Transcript:**\n{transcript}"
235
+ )
236
+ return markdown_transcript
237
+
238
+ except Exception as e:
239
+ error_msg = f"Error transcribing audio '{audio_path}': {str(e)}"
240
+ return f"**Error:** {error_msg}"
241
+
242
+
243
+ # Excel Tool
244
+ def excel_to_csv(excel_path: str) -> str:
245
+ """
246
+ Given an Excel file path or URL and an optional sheet name,
247
+ reads the spreadsheet using openpyxl and returns its contents as CSV text.
248
+
249
+ Args:
250
+ excel_path (str): The URL or local file path of the Excel file to convert.
251
+
252
+ Returns:
253
+ str: The CSV-formatted content of the sheet.
254
+ """
255
+ print("--- Converting Excel to CSV ---")
256
+ print(f"Excel Path: {excel_path}")
257
+ excel_path = os.path.join("./GAIA_resource/", excel_path)
258
+
259
+ try:
260
+ # Load workbook from URL or local file
261
+ if excel_path.startswith("http"):
262
+ response = requests.get(excel_path)
263
+ response.raise_for_status()
264
+ data_stream = BytesIO(response.content)
265
+ wb = load_workbook(filename=data_stream, data_only=True)
266
+ else:
267
+ wb = load_workbook(filename=excel_path, data_only=True)
268
+
269
+ # Select worksheet
270
+ ws = wb.active
271
+
272
+ # Build CSV lines manually
273
+ lines = []
274
+ for row in ws.iter_rows(values_only=True):
275
+ # Convert each cell to string, using empty string for None
276
+ str_cells = ["" if cell is None else str(cell) for cell in row]
277
+ # Join cells with commas
278
+ line = ",".join(str_cells)
279
+ lines.append(line)
280
+
281
+ # Combine all lines into one CSV string
282
+ print("Converted Excel to CSV result : ", lines)
283
+ return "\n".join(lines)
284
+
285
+ except Exception as e:
286
+ return f"Error converting Excel to CSV: {e}"
287
+
288
+ data_analyzer_agent = LlmAgent(
289
+ model="gemini-2.5-flash-preview-04-17",
290
+ name="data_analyzer_agent",
291
+ description="When data is provided, analyze it and derive an appropriate answer.",
292
+ instruction="""
293
+ # Steps
294
+ 1. **Data Review**: Understand the data provided and understand what it shows.
295
+ 2. **Prepare for Analysis**: If necessary, clean the data and prepare it for analysis.
296
+ 3. **Data Analysis**: Analyze the data using appropriate methods to find meaningful information and trends.
297
+ 4. **Interpretation**: Interpret the analysis results to answer questions and doubts.
298
+ 5. **Present Conclusions**: Present your conclusions and insights in a logical summary.
299
+
300
+ # Output Format
301
+ - State your conclusions in a short sentence, but make sure they are clear and specific.
302
+ - If necessary, use tables and graphs to provide additional information.
303
+
304
+ # Examples
305
+ - **Input Data**:
306
+ - Survey data on age, gender, occupation, and annual income
307
+ - **Analysis Results**:
308
+ - The older the person, the higher the annual income tends to be.
309
+ - **Statement of conclusion**:
310
+ - "The survey data shows that the older you are, the higher your average annual income is."
311
+
312
+ # Notes
313
+ - If your data set is very large, consider using sample data or segmenting your data for analysis.
314
+ - Distinguish between qualitative and quantitative data and choose the appropriate analysis method for each.
315
+ """,
316
+ tools=[excel_to_csv] # Provide the function directly
317
+ )
318
+
319
+
320
+ # Read file ascii
321
+ def read_file_ascii(file_path: str) -> str:
322
+ """
323
+ Given a file URL or local file path, reads the file content and returns it as an ASCII string.
324
+
325
+ Args:
326
+ file_path (str): The URL or local file path of the file to read.
327
+
328
+ Returns:
329
+ str: The ASCII-decoded content of the file, or an error message on failure.
330
+ """
331
+ print("File Path : ", file_path)
332
+ file_path = os.path.join("./GAIA_resource/", file_path)
333
+
334
+ try:
335
+ # Load data from URL or local file
336
+ if file_path.startswith("http"):
337
+ response = requests.get(file_path)
338
+ response.raise_for_status()
339
+ data_bytes = response.content
340
+ else:
341
+ with open(file_path, "rb") as f:
342
+ data_bytes = f.read()
343
+
344
+ # Decode bytes to ASCII string, replacing errors
345
+ ascii_str = data_bytes.decode("ascii", errors="replace")
346
+ return ascii_str
347
+
348
+ except Exception as e:
349
+ return f"Error reading file as ASCII: {e}"
350
+
351
+
352
+ # Call Agent Async
353
+ async def call_agent_async(query: str, runner, user_id, session_id):
354
+ """Sends a query to the agent and prints the final response."""
355
+ print(f"\n>>> User Query: {query}")
356
+
357
+ # Prepare the user's message in ADK format
358
+ content = types.Content(role='user', parts=[types.Part(text=query)])
359
+
360
+ final_response_text = "Agent did not produce a final response." # Default
361
+
362
+ # Key Concept: run_async executes the agent logic and yields Events.
363
+ # We iterate through events to find the final answer.
364
+ async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=content):
365
+ # Key Concept: is_final_response() marks the concluding message for the turn.
366
+ if event.is_final_response():
367
+ if event.content and event.content.parts:
368
+ # Assuming text response in the first part
369
+ final_response_text = event.content.parts[0].text
370
+ elif event.actions and event.actions.escalate: # Handle potential errors/escalations
371
+ final_response_text = f"Agent escalated: {event.error_message or 'No specific message.'}"
372
+ # Add more checks here if needed (e.g., specific error codes)
373
+ break # Stop processing events once the final response is found
374
+
375
+ print(f"<<< Agent Response: {final_response_text}")
376
+ return final_response_text # Return the final response text
377
+
378
+
379
  # (Keep Constants as is)
380
  # --- Constants ---
381
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
382
+ # for GAIA Repo
383
+ GAIA_REPO_ID = "gaia-benchmark/GAIA"
384
+ GAIA_VALIDATION_DIR = "2023/validation"
385
+ LOCAL_GAIA_DIR = "GAIA_resource"
386
+
387
+
388
+ # --- GAIA Data Download Utility ---
389
+ def download_gaia_validation(local_dir: str = LOCAL_GAIA_DIR):
390
+ """
391
+ Download only the validation part of the Hugging Face GAIA dataset to
392
+ local_dir/2023/validation/.
393
+ If it has already been downloaded, it will not be downloaded again.
394
+ """
395
+ target_path = os.path.join(local_dir, GAIA_VALIDATION_DIR)
396
+ if os.path.isdir(target_path) and os.listdir(target_path):
397
+ print(f"GAIA validation data already exists at {target_path}")
398
+ return
399
+
400
+ os.makedirs(local_dir, exist_ok=True)
401
+ print(f"Downloading GAIA validation data into {local_dir} ...")
402
+ snapshot_download(
403
+ repo_id=GAIA_REPO_ID,
404
+ repo_type="dataset",
405
+ allow_patterns=[f"{GAIA_VALIDATION_DIR}/*"],
406
+ local_dir=local_dir,
407
+ local_dir_use_symlinks=False
408
+ )
409
+ print(f"Downloaded GAIA validation data to {target_path}")
410
 
411
  # --- Basic Agent Definition ---
412
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
413
+ #class BasicAgent:
414
+ # def __init__(self):
415
+ # print("BasicAgent initialized.")
416
+ # def __call__(self, question: str) -> str:
417
+ # print(f"Agent received question (first 50 chars): {question[:50]}...")
418
+ # #fixed_answer = "This is a default answer."
419
+ # #print(f"Agent returning fixed answer: {fixed_answer}")
420
+ #
421
+ # return fixed_answer
422
+
423
+
424
+ description_text = """
425
+ You are GAIA Solver, a highly capable AI assistant designed to answer questions from the GAIA benchmark accurately and concisely using a suite of available tools. Your goal is to provide the precise answer in the requested format based *only* on the provided question text.
426
+ """
427
+
428
+ instruction_text = """
429
+
430
+ Thinking Process:
431
+ 1. **Analyze Question & Identify Files:** Carefully read the question. Determine the core task and the **exact final answer format**. Check if the question explicitly mentions an attached file (image, Excel, audio, code).
432
+ 2. **Identify Filename:** If a file is mentioned, identify its filename from the text (e.g., "Homework.mp3", "image.png"). If no specific filename is given for a required file type, state that you need the filename. **Do not guess filenames.**
433
+ 3. **Plan:** Create a step-by-step plan using tools. If a file is needed, include the correct tool call with the identified filename.
434
+ 4. **Execute & Refine:** Execute the plan. Pass correct arguments (especially filenames). Evaluate tool outputs. If errors occur (e.g., file not found, API errors) or info is insufficient, revise the plan (e.g., use `web_search`, different tool prompts).
435
+ 5. **Synthesize Answer:** Combine information. Use `execute_python_code` for final formatting/calculations.
436
+ 6. **Final Output:** Generate **only the final answer** in the requested format. No extra text. If the answer cannot be found or a required filename was missing/invalid, output: "I could not find the answer."
437
+
438
+ Constraints:
439
+ - Base actions *only* on the provided question text.
440
+ - Adhere strictly to the requested output format.
441
+ """
442
+
443
+
444
+ async def run_and_submit_all( profile: gr.OAuthProfile | None):
445
  """
446
  Fetches all questions, runs the BasicAgent on them, submits all answers,
447
  and displays the results.
 
460
  questions_url = f"{api_url}/questions"
461
  submit_url = f"{api_url}/submit"
462
 
463
+ # 0. Download GAIA data
464
+ try:
465
+ download_gaia_validation()
466
+ except Exception as e:
467
+ err = f"Error downloading GAIA validation data: {e}"
468
+ print(err)
469
+ return err, None
470
+
471
  # 1. Instantiate Agent ( modify this part to create your agent)
472
  try:
473
+ root_agent = Agent(
474
+ name = "root_agent",
475
+ model = "gemini-2.5-pro-preview-03-25",
476
+ description = description_text,
477
+ instruction = instruction_text,
478
+ tools = [
479
+ agent_tool.AgentTool(agent=search_agent),
480
+ agent_tool.AgentTool(agent=coding_agent),
481
+ agent_tool.AgentTool(agent=code_execution_agent),
482
+ understand_youtube_video,
483
+ understand_image,
484
+ transcribe_audio,
485
+ agent_tool.AgentTool(agent=data_analyzer_agent),
486
+ read_file_ascii,
487
+ ]
488
+ )
489
  except Exception as e:
490
  print(f"Error instantiating agent: {e}")
491
  return f"Error initializing agent: {e}", None
 
521
  for item in questions_data:
522
  task_id = item.get("task_id")
523
  question_text = item.get("question")
524
+ question_file_name = item.get("file_name")
525
+ question_all = question_text + " file_name = " + question_file_name
526
  if not task_id or question_text is None:
527
  print(f"Skipping item with missing task_id or question: {item}")
528
  continue
529
  try:
530
+ APP_NAME = "gaia_agent"
531
+ USER_ID = "user_1"
532
+ SESSION_ID = item.get("task_id")
533
+
534
+ session_service = InMemorySessionService()
535
+
536
+ session = session_service.create_session(
537
+ app_name=APP_NAME,
538
+ user_id=USER_ID,
539
+ session_id=SESSION_ID
540
+ )
541
+ runner = Runner(
542
+ agent=root_agent, # The agent we want to run
543
+ app_name=APP_NAME, # Associates runs with our app
544
+ session_service=session_service # Uses our session manager
545
+ )
546
+ submitted_answer = await call_agent_async(question_all,
547
+ runner=runner,
548
+ user_id=USER_ID,
549
+ session_id=SESSION_ID)
550
+
551
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
552
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
553
  except Exception as e:
 
557
  if not answers_payload:
558
  print("Agent did not produce any answers to submit.")
559
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
560
+
561
 
562
  # 4. Prepare Submission
563
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
613
  gr.Markdown("# Basic Agent Evaluation Runner")
614
  gr.Markdown(
615
  """
616
+ **Introduction:**
617
+
618
+ This is an agent for GAIA benchmark.
619
+ Built with Google ADK (Agent Development Kit)
620
+
621
  **Instructions:**
622
 
623
+ Log in to your Hugging Face account using the button below. This uses your HF username for submission.
624
+ Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
625
 
626
  ---
627
  **Disclaimers:**
 
665
  print("-"*(60 + len(" App Starting ")) + "\n")
666
 
667
  print("Launching Gradio Interface for Basic Agent Evaluation...")
668
+ demo.launch(debug=True, share=False)
669
+
excel_test.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from openpyxl import load_workbook
4
+
5
+
6
+ # Excel Tool
7
+ def excel_to_csv(excel_path: str) -> str:
8
+ """
9
+ Given an Excel file path or URL and an optional sheet name,
10
+ reads the spreadsheet using openpyxl and returns its contents as CSV text.
11
+
12
+ Args:
13
+ excel_path (str): The URL or local file path of the Excel file to convert.
14
+
15
+ Returns:
16
+ str: The CSV-formatted content of the sheet.
17
+ """
18
+ print("--- Converting Excel to CSV ---")
19
+ print(f"Excel Path: {excel_path}")
20
+ excel_path = os.path.join("./GAIA_resource/", excel_path)
21
+
22
+ try:
23
+ # Load workbook from URL or local file
24
+ if excel_path.startswith("http"):
25
+ response = requests.get(excel_path)
26
+ response.raise_for_status()
27
+ data_stream = BytesIO(response.content)
28
+ wb = load_workbook(filename=data_stream, data_only=True)
29
+ else:
30
+ wb = load_workbook(filename=excel_path, data_only=True)
31
+
32
+ # Select worksheet
33
+ ws = wb.active
34
+
35
+ # Build CSV lines manually
36
+ lines = []
37
+ for row in ws.iter_rows(values_only=True):
38
+ # Convert each cell to string, using empty string for None
39
+ str_cells = ["" if cell is None else str(cell) for cell in row]
40
+ # Join cells with commas
41
+ line = ",".join(str_cells)
42
+ lines.append(line)
43
+
44
+ # Combine all lines into one CSV string
45
+ print("Converted Excel to CSV result : ", lines)
46
+ return "\n".join(lines)
47
+
48
+ except Exception as e:
49
+ return f"Error converting Excel to CSV: {e}"
50
+
51
+
52
+ excel_to_csv("7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")
requirements.txt CHANGED
@@ -1,2 +1,5 @@
1
  gradio
2
- requests
 
 
 
 
1
  gradio
2
+ requests
3
+ google-genai
4
+ google.adk
5
+ openpyxl