Final_Assignment_1

Running

hgmiya commited on 1 day ago

Commit

aa3be20

1 Parent(s): 81917a3

Implement GAIA Solver with enhanced agent capabilities and tool integration

- Added Google ADK agents for code execution, search, and data analysis.
- Integrated YouTube video analysis and image understanding tools.
- Developed audio transcription and Excel to CSV conversion functionalities.
- Established asynchronous agent call mechanism for improved performance.
- Configured environment variable loading for API keys.
- Created a structured approach for handling user queries and file inputs.
- Enhanced error handling and logging throughout the application.
- Updated requirements.txt to include necessary libraries.
- Added .gitignore to exclude unnecessary files and directories.

Files changed (6) hide show

.gitignore +147 -0
__init__.py +1 -0
agent.py +530 -0
app.py +489 -16
excel_test.py +52 -0
requirements.txt +4 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,147 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# PEP 582; used by PDM, PEP 582 compatible tools and project workflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static analysis results
+.pytype/
+# Cython debug symbols
+cython_debug/
+# VS Code settings folder
+.vscode/
+# IDE specific files (JetBrains, Sublime Text, etc.)
+.idea/
+*.iml
+*.sublime-project
+*.sublime-workspace
+# OS generated files
+.DS_Store
+Thumbs.db
+# Sensitive credentials - Add the specific path from your .env file
+/path/to/your/google_cloud_credentials.json
+# Add any other files or directories specific to your project below
+# e.g., logs/, temp/, data/

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from . import agent

agent.py ADDED Viewed

	@@ -0,0 +1,530 @@

+import os
+import gradio as gr
+import requests
+import inspect
+import pandas as pd
+import asyncio
+from google import genai
+from google.adk.agents import Agent
+from google.adk.runners import Runner
+from google.adk.sessions import InMemorySessionService
+from google.genai import types
+from google.adk.tools import agent_tool
+from google.adk.agents import Agent
+from google.adk.tools import google_search, built_in_code_execution
+from google.adk.agents import LlmAgent
+from openpyxl import load_workbook
+import warnings
+# Ignore all warnings
+warnings.filterwarnings("ignore")
+import logging
+logging.basicConfig(level=logging.ERROR)
+# Load API KEYs
+from dotenv import load_dotenv
+load_dotenv()
+GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
+# Agent Tools
+coding_agent = LlmAgent(
+    model='gemini-2.0-flash',
+    name='CodeAgent',
+    instruction="""You are a calculator agent.
+    When given a mathematical expression, write and execute Python code to calculate the result.
+    Return only the final numerical result as plain text, without markdown or code blocks.
+    """,
+    description="Executes Python code to perform calculations.",
+    tools=[built_in_code_execution],
+)
+code_execution_agent = LlmAgent(
+    model='gemini-2.0-flash',
+    name='CodeAgent',
+    instruction="""
+    You're a specialist in Code Execution. Execute Python code to get the result.
+    Return only the final numerical result as plain text, without markdown or code blocks.
+    If you given the python code, do not add, subtract any codes from original one.
+    """,
+    description="Executes Python code. It will not generate code.",
+    tools=[built_in_code_execution],
+)
+search_agent = Agent(
+    name="basic_search_agent",
+    model="gemini-2.0-flash",
+    description="Agent to answer questions using Google Search.",
+    instruction="I can answer your questions by searching the internet. Just ask me anything!",
+    # google_search is a pre-built tool which allows the agent to perform Google searches.
+    tools=[google_search]
+)
+# YouTube Tools
+def understand_youtube_video(video_url: str, question: str) -> str:
+    """
+    Given a YouTube video URL and question, this will use the Gemini API to analyze the video content and provide an answer.
+    Args:
+    video_url (str): The URL of the YouTube video you want to analyze (e.g. "https://www.youtube.com/watch?v=...").
+    If Gemini cannot handle this directly, you may need a different format, such as a GCS URI.
+    question (str): The specific question about the video content.
+    Returns:
+    str: The answer generated by the Gemini model based on the video and question.
+    Returns an error message if processing fails.
+    """
+    print(f"--- Analyzing YouTube Video ---")
+    print(f"URL: {video_url}")
+    print(f"Question: {question}")
+    try:
+        client = genai.Client(api_key=GOOGLE_API_KEY)
+        model='models/gemini-2.0-flash',
+        response = client.models.generate_content(
+            model='models/gemini-2.0-flash',
+            contents=types.Content(
+                parts=[
+                    types.Part(
+                    file_data=types.FileData(file_uri=video_url)
+                    ),
+                types.Part(text=question)
+                ]
+            )
+        )
+        print("--- Gemini Response Received ---")
+        if hasattr(response, 'text'):
+            return response.text
+        elif response.parts:
+             return "".join(part.text for part in response.parts if hasattr(part, 'text'))
+        else:
+             block_reason = ""
+             if response.prompt_feedback and response.prompt_feedback.block_reason:
+                 block_reason = f" Reason: {response.prompt_feedback.block_reason.name}"
+             return f"Model did not return text content.{block_reason}"
+    except Exception as e:
+        print(f"Error processing YouTube video '{video_url}' with Gemini: {e}")
+        return f"Sorry, an error occurred while analyzing the video. Please check the URL and ensure the video is accessible. Error details: {str(e)}"
+# Image Tools
+def understand_image(image_file_name: str) -> str:
+    """
+    Given an image file , this will analyze the image in detail and describe its contents in as much detail as possible.
+    Args:
+        image_file_name (str): The file name of the image to analyze. Which given as "file_name" parameter in the question.
+    Returns:
+        str: The response text generated by the Gemini model.
+    """
+    image_url = os.path.join("./GAIA_resource/" , image_file_name)
+    print("--- Analyzing Image ---")
+    print(f"Image URL/Path: {image_url}")
+    prompt = """
+        Analyze the image in detail and describe its contents in as much detail as possible.
+        For example, give someone a chess board and describe where each piece is.
+The description should include the following information:
+- General overview of the image
+- Details of important elements and features (e.g., location relationships, attributes, etc.)
+- Identification of specific objects or characters (e.g., game piece names, positions, people, etc.)
+# Steps
+1. Examine the image as a whole and identify the main elements.
+2. Examine each element in detail and identify what it is.
+3. Develop a description of each element based on its characteristic relationships and positions.
+4. Finally, summarize the overall scene or situation.
+# Output Format
+Provide detailed descriptions in paragraphs of text, using bullet points where necessary.
+    """
+    try:
+        # Fetch the image data
+        if image_url.startswith("http"):
+            image_bytes = requests.get(image_url).content
+        else:
+            with open(image_url, "rb") as f:
+                image_bytes = f.read()
+        # Create image part
+        image_part = types.Part.from_bytes(
+            data=image_bytes,
+            mime_type="image/jpeg"
+        )
+        # Initialize the Gemini client
+        client = genai.Client(api_key=GOOGLE_API_KEY)
+        # Build contents with question text and image part
+        response = client.models.generate_content(
+            model="gemini-2.0-flash-exp",
+            contents=[
+                prompt,
+                image_part
+            ]
+        )
+        print("--- Gemini Response Received ---")
+        # Extract text from the response
+        if hasattr(response, 'text'):
+            return response.text
+        elif getattr(response, 'parts', None):
+            return "".join(part.text for part in response.parts if hasattr(part, 'text'))
+        else:
+            block_reason = ""
+            if response.prompt_feedback and response.prompt_feedback.block_reason:
+                block_reason = f" Reason: {response.prompt_feedback.block_reason.name}"
+            return f"Model did not return text content.{block_reason}"
+    except Exception as e:
+        print(f"Error processing image '{image_url}' with Gemini: {e}")
+        return f"Sorry, an error occurred while analyzing the image. Please check the image URL or path. Error details: {str(e)}"
+# Audio Tool
+def transcribe_audio(audio_path: str) -> str:
+    """
+    Given an audio file path or URL, uploads the file to Gemini API and generates a speech transcript.
+    Args:
+        audio_path (str): The URL or local file path of the audio to transcribe.
+    Returns:
+        str: A Markdown-formatted transcript of the speech, or an error message.
+    """
+    print("--- Transcribing Audio ---")
+    print(f"Audio Path: {audio_path}")
+    audio_path = os.path.join("./GAIA_resource/", audio_path)
+    try:
+        # Initialize Gemini client
+        client = genai.Client(api_key=GOOGLE_API_KEY)
+        # Upload the audio file
+        uploaded = client.files.upload(file=audio_path)
+        prompt = "Generate a transcript of the speech."
+        # Generate transcript
+        response = client.models.generate_content(
+            model="gemini-2.0-flash",
+            contents=[prompt, uploaded]
+        )
+        print("--- Gemini Response Received ---")
+        # Extract transcript text
+        if hasattr(response, 'text'):
+            transcript = response.text
+        elif getattr(response, 'parts', None):
+            transcript = "".join(part.text for part in response.parts if hasattr(part, 'text'))
+        else:
+            transcript = "Model did not return text content."
+        # Format as Markdown
+        markdown_transcript = (
+            "## Audio Transcription Result\n"
+            f"**Transcript:**\n{transcript}"
+        )
+        return markdown_transcript
+    except Exception as e:
+        error_msg = f"Error transcribing audio '{audio_path}': {str(e)}"
+        return f"**Error:** {error_msg}"
+# Excel Tool
+def excel_to_csv(excel_path: str) -> str:
+    """
+    Given an Excel file path or URL and an optional sheet name,
+    reads the spreadsheet using openpyxl and returns its contents as CSV text.
+    Args:
+        excel_path (str): The URL or local file path of the Excel file to convert.
+    Returns:
+        str: The CSV-formatted content of the sheet.
+    """
+    print("--- Converting Excel to CSV ---")
+    print(f"Excel Path: {excel_path}")
+    excel_path = os.path.join("./GAIA_resource/", excel_path)
+    try:
+        # Load workbook from URL or local file
+        if excel_path.startswith("http"):
+            response = requests.get(excel_path)
+            response.raise_for_status()
+            data_stream = BytesIO(response.content)
+            wb = load_workbook(filename=data_stream, data_only=True)
+        else:
+            wb = load_workbook(filename=excel_path, data_only=True)
+        # Select worksheet
+        ws = wb.active
+        # Build CSV lines manually
+        lines = []
+        for row in ws.iter_rows(values_only=True):
+            # Convert each cell to string, using empty string for None
+            str_cells = ["" if cell is None else str(cell) for cell in row]
+            # Join cells with commas
+            line = ",".join(str_cells)
+            lines.append(line)
+        # Combine all lines into one CSV string
+        print("Converted Excel to CSV result : ", lines)
+        return "\n".join(lines)
+    except Exception as e:
+        return f"Error converting Excel to CSV: {e}"
+data_analyzer_agent = LlmAgent(
+    model="gemini-2.5-flash-preview-04-17",
+    name="data_analyzer_agent",
+    description="When data is provided, analyze it and derive an appropriate answer.",
+    instruction="""
+# Steps
+1. **Data Review**: Understand the data provided and understand what it shows.
+2. **Prepare for Analysis**: If necessary, clean the data and prepare it for analysis.
+3. **Data Analysis**: Analyze the data using appropriate methods to find meaningful information and trends.
+4. **Interpretation**: Interpret the analysis results to answer questions and doubts.
+5. **Present Conclusions**: Present your conclusions and insights in a logical summary.
+# Output Format
+- State your conclusions in a short sentence, but make sure they are clear and specific.
+- If necessary, use tables and graphs to provide additional information.
+# Examples
+- **Input Data**:
+- Survey data on age, gender, occupation, and annual income
+- **Analysis Results**:
+- The older the person, the higher the annual income tends to be.
+- **Statement of conclusion**:
+- "The survey data shows that the older you are, the higher your average annual income is."
+# Notes
+- If your data set is very large, consider using sample data or segmenting your data for analysis.
+- Distinguish between qualitative and quantitative data and choose the appropriate analysis method for each.
+""",
+    tools=[excel_to_csv] # Provide the function directly
+)
+# Read file ascii
+def read_file_ascii(file_path: str) -> str:
+    """
+    Given a file URL or local file path, reads the file content and returns it as an ASCII string.
+    Args:
+        file_path (str): The URL or local file path of the file to read.
+    Returns:
+        str: The ASCII-decoded content of the file, or an error message on failure.
+    """
+    print("File Path : ", file_path)
+    file_path = os.path.join("./GAIA_resource/", file_path)
+    try:
+        # Load data from URL or local file
+        if file_path.startswith("http"):
+            response = requests.get(file_path)
+            response.raise_for_status()
+            data_bytes = response.content
+        else:
+            with open(file_path, "rb") as f:
+                data_bytes = f.read()
+        # Decode bytes to ASCII string, replacing errors
+        ascii_str = data_bytes.decode("ascii", errors="replace")
+        return ascii_str
+    except Exception as e:
+        return f"Error reading file as ASCII: {e}"
+# Call Agent Async
+async def call_agent_async(query: str, runner, user_id, session_id):
+  """Sends a query to the agent and prints the final response."""
+  print(f"\n>>> User Query: {query}")
+  # Prepare the user's message in ADK format
+  content = types.Content(role='user', parts=[types.Part(text=query)])
+  final_response_text = "Agent did not produce a final response." # Default
+  # Key Concept: run_async executes the agent logic and yields Events.
+  # We iterate through events to find the final answer.
+  async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=content):
+      # Key Concept: is_final_response() marks the concluding message for the turn.
+      if event.is_final_response():
+          if event.content and event.content.parts:
+             # Assuming text response in the first part
+             final_response_text = event.content.parts[0].text
+          elif event.actions and event.actions.escalate: # Handle potential errors/escalations
+             final_response_text = f"Agent escalated: {event.error_message or 'No specific message.'}"
+          # Add more checks here if needed (e.g., specific error codes)
+          break # Stop processing events once the final response is found
+  print(f"<<< Agent Response: {final_response_text}")
+  return final_response_text # Return the final response text
+# (Keep Constants as is)
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Basic Agent Definition ---
+# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+#class BasicAgent:
+#    def __init__(self):
+#        print("BasicAgent initialized.")
+#    def __call__(self, question: str) -> str:
+#        print(f"Agent received question (first 50 chars): {question[:50]}...")
+#        #fixed_answer = "This is a default answer."
+#        #print(f"Agent returning fixed answer: {fixed_answer}")
+#
+#        return fixed_answer
+description_text = """
+You are GAIA Solver, a highly capable AI assistant designed to answer questions from the GAIA benchmark accurately and concisely using a suite of available tools. Your goal is to provide the precise answer in the requested format based *only* on the provided question text.
+"""
+instruction_text = """
+Thinking Process:
+1.  **Analyze Question & Identify Files:** Carefully read the question. Determine the core task and the **exact final answer format**. Check if the question explicitly mentions an attached file (image, Excel, audio, code).
+2.  **Identify Filename:** If a file is mentioned, identify its filename from the text (e.g., "Homework.mp3", "image.png"). If no specific filename is given for a required file type, state that you need the filename. **Do not guess filenames.**
+3.  **Plan:** Create a step-by-step plan using tools. If a file is needed, include the correct tool call with the identified filename.
+4.  **Execute & Refine:** Execute the plan. Pass correct arguments (especially filenames). Evaluate tool outputs. If errors occur (e.g., file not found, API errors) or info is insufficient, revise the plan (e.g., use `web_search`, different tool prompts).
+5.  **Synthesize Answer:** Combine information. Use `execute_python_code` for final formatting/calculations.
+6.  **Final Output:** Generate **only the final answer** in the requested format. No extra text. If the answer cannot be found or a required filename was missing/invalid, output: "I could not find the answer."
+Constraints:
+- Base actions *only* on the provided question text.
+- Adhere strictly to the requested output format.
+"""
+async def main():
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
+    try:
+        root_agent = Agent(
+            name = "root_agent",
+            model = "gemini-2.5-pro-preview-03-25",
+            description = description_text,
+            instruction = instruction_text,
+            tools = [
+                agent_tool.AgentTool(agent=search_agent),
+                agent_tool.AgentTool(agent=coding_agent),
+                agent_tool.AgentTool(agent=code_execution_agent),
+                understand_youtube_video,
+                understand_image,
+                transcribe_audio,
+                agent_tool.AgentTool(agent=data_analyzer_agent),
+                read_file_ascii,
+            ]
+        )
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             print("Fetched questions list is empty.")
+             return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from questions endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+         return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    i = 0
+    for item in questions_data:
+        i += 1
+        if i < 12:
+            continue
+        elif i > 12:
+            break
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        question_file_name = item.get("file_name")
+        question_all = question_text + " file_name = " + question_file_name
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            APP_NAME = "gaia_agent"
+            USER_ID = "user_1"
+            SESSION_ID = item.get("task_id")
+            session_service = InMemorySessionService()
+            session = session_service.create_session(
+                app_name=APP_NAME,
+                user_id=USER_ID,
+                session_id=SESSION_ID
+                )
+            runner = Runner(
+                agent=root_agent, # The agent we want to run
+                app_name=APP_NAME,   # Associates runs with our app
+                session_service=session_service # Uses our session manager
+            )
+            submitted_answer = await call_agent_async(question_all,
+                                       runner=runner,
+                                       user_id=USER_ID,
+                                       session_id=SESSION_ID)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+        except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    #submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    #status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    #print(status_update)
+# スクリプトが直接実行された場合にここから開始します
+if __name__ == "__main__":
+    # asyncio.run() を使って非同期の main 関数を実行します
+    # これがないと async def main() は実行されません
+    try:
+        asyncio.run(main())
+    except Exception as e:
+        print(f"An error occurred during the asyncio run: {e}")

app.py CHANGED Viewed

@@ -4,22 +4,444 @@ import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
@@ -38,9 +460,32 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -76,11 +521,33 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -90,6 +557,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
@@ -145,11 +613,15 @@ with gr.Blocks() as demo:
     gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
@@ -193,4 +665,5 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import inspect
 import pandas as pd
+import asyncio
+from google import genai
+from google.adk.agents import Agent
+from google.adk.runners import Runner
+from google.adk.sessions import InMemorySessionService
+from google.genai import types
+from google.adk.tools import agent_tool
+from google.adk.agents import Agent
+from google.adk.tools import google_search, built_in_code_execution
+from google.adk.agents import LlmAgent
+from openpyxl import load_workbook
+import warnings
+# Ignore all warnings
+warnings.filterwarnings("ignore")
+import logging
+logging.basicConfig(level=logging.ERROR)
+# Load API KEYs
+os.getenv('GOOGLE_API_KEY')
+# Agent Tools
+coding_agent = LlmAgent(
+    model='gemini-2.0-flash',
+    name='CodeAgent',
+    instruction="""You are a calculator agent.
+    When given a mathematical expression, write and execute Python code to calculate the result.
+    Return only the final numerical result as plain text, without markdown or code blocks.
+    """,
+    description="Executes Python code to perform calculations.",
+    tools=[built_in_code_execution],
+)
+code_execution_agent = LlmAgent(
+    model='gemini-2.0-flash',
+    name='CodeAgent',
+    instruction="""
+    You're a specialist in Code Execution. Execute Python code to get the result.
+    Return only the final numerical result as plain text, without markdown or code blocks.
+    If you given the python code, do not add, subtract any codes from original one.
+    """,
+    description="Executes Python code. It will not generate code.",
+    tools=[built_in_code_execution],
+)
+search_agent = Agent(
+    name="basic_search_agent",
+    model="gemini-2.0-flash",
+    description="Agent to answer questions using Google Search.",
+    instruction="I can answer your questions by searching the internet. Just ask me anything!",
+    # google_search is a pre-built tool which allows the agent to perform Google searches.
+    tools=[google_search]
+)
+# YouTube Tools
+def understand_youtube_video(video_url: str, question: str) -> str:
+    """
+    Given a YouTube video URL and question, this will use the Gemini API to analyze the video content and provide an answer.
+    Args:
+    video_url (str): The URL of the YouTube video you want to analyze (e.g. "https://www.youtube.com/watch?v=...").
+    If Gemini cannot handle this directly, you may need a different format, such as a GCS URI.
+    question (str): The specific question about the video content.
+    Returns:
+    str: The answer generated by the Gemini model based on the video and question.
+    Returns an error message if processing fails.
+    """
+    print(f"--- Analyzing YouTube Video ---")
+    print(f"URL: {video_url}")
+    print(f"Question: {question}")
+    try:
+        client = genai.Client(api_key=GOOGLE_API_KEY)
+        model='models/gemini-2.0-flash',
+        response = client.models.generate_content(
+            model='models/gemini-2.0-flash',
+            contents=types.Content(
+                parts=[
+                    types.Part(
+                    file_data=types.FileData(file_uri=video_url)
+                    ),
+                types.Part(text=question)
+                ]
+            )
+        )
+        print("--- Gemini Response Received ---")
+        if hasattr(response, 'text'):
+            return response.text
+        elif response.parts:
+             return "".join(part.text for part in response.parts if hasattr(part, 'text'))
+        else:
+             block_reason = ""
+             if response.prompt_feedback and response.prompt_feedback.block_reason:
+                 block_reason = f" Reason: {response.prompt_feedback.block_reason.name}"
+             return f"Model did not return text content.{block_reason}"
+    except Exception as e:
+        print(f"Error processing YouTube video '{video_url}' with Gemini: {e}")
+        return f"Sorry, an error occurred while analyzing the video. Please check the URL and ensure the video is accessible. Error details: {str(e)}"
+# Image Tools
+def understand_image(image_file_name: str) -> str:
+    """
+    Given an image file , this will analyze the image in detail and describe its contents in as much detail as possible.
+    Args:
+        image_file_name (str): The file name of the image to analyze. Which given as "file_name" parameter in the question.
+    Returns:
+        str: The response text generated by the Gemini model.
+    """
+    image_url = os.path.join("./GAIA_resource/" , image_file_name)
+    print("--- Analyzing Image ---")
+    print(f"Image URL/Path: {image_url}")
+    prompt = """
+        Analyze the image in detail and describe its contents in as much detail as possible.
+        For example, give someone a chess board and describe where each piece is.
+The description should include the following information:
+- General overview of the image
+- Details of important elements and features (e.g., location relationships, attributes, etc.)
+- Identification of specific objects or characters (e.g., game piece names, positions, people, etc.)
+# Steps
+1. Examine the image as a whole and identify the main elements.
+2. Examine each element in detail and identify what it is.
+3. Develop a description of each element based on its characteristic relationships and positions.
+4. Finally, summarize the overall scene or situation.
+# Output Format
+Provide detailed descriptions in paragraphs of text, using bullet points where necessary.
+    """
+    try:
+        # Fetch the image data
+        if image_url.startswith("http"):
+            image_bytes = requests.get(image_url).content
+        else:
+            with open(image_url, "rb") as f:
+                image_bytes = f.read()
+        # Create image part
+        image_part = types.Part.from_bytes(
+            data=image_bytes,
+            mime_type="image/jpeg"
+        )
+        # Initialize the Gemini client
+        client = genai.Client(api_key=GOOGLE_API_KEY)
+        # Build contents with question text and image part
+        response = client.models.generate_content(
+            model="gemini-2.0-flash-exp",
+            contents=[
+                prompt,
+                image_part
+            ]
+        )
+        print("--- Gemini Response Received ---")
+        # Extract text from the response
+        if hasattr(response, 'text'):
+            return response.text
+        elif getattr(response, 'parts', None):
+            return "".join(part.text for part in response.parts if hasattr(part, 'text'))
+        else:
+            block_reason = ""
+            if response.prompt_feedback and response.prompt_feedback.block_reason:
+                block_reason = f" Reason: {response.prompt_feedback.block_reason.name}"
+            return f"Model did not return text content.{block_reason}"
+    except Exception as e:
+        print(f"Error processing image '{image_url}' with Gemini: {e}")
+        return f"Sorry, an error occurred while analyzing the image. Please check the image URL or path. Error details: {str(e)}"
+# Audio Tool
+def transcribe_audio(audio_path: str) -> str:
+    """
+    Given an audio file path or URL, uploads the file to Gemini API and generates a speech transcript.
+    Args:
+        audio_path (str): The URL or local file path of the audio to transcribe.
+    Returns:
+        str: A Markdown-formatted transcript of the speech, or an error message.
+    """
+    print("--- Transcribing Audio ---")
+    print(f"Audio Path: {audio_path}")
+    audio_path = os.path.join("./GAIA_resource/", audio_path)
+    try:
+        # Initialize Gemini client
+        client = genai.Client(api_key=GOOGLE_API_KEY)
+        # Upload the audio file
+        uploaded = client.files.upload(file=audio_path)
+        prompt = "Generate a transcript of the speech."
+        # Generate transcript
+        response = client.models.generate_content(
+            model="gemini-2.0-flash",
+            contents=[prompt, uploaded]
+        )
+        print("--- Gemini Response Received ---")
+        # Extract transcript text
+        if hasattr(response, 'text'):
+            transcript = response.text
+        elif getattr(response, 'parts', None):
+            transcript = "".join(part.text for part in response.parts if hasattr(part, 'text'))
+        else:
+            transcript = "Model did not return text content."
+        # Format as Markdown
+        markdown_transcript = (
+            "## Audio Transcription Result\n"
+            f"**Transcript:**\n{transcript}"
+        )
+        return markdown_transcript
+    except Exception as e:
+        error_msg = f"Error transcribing audio '{audio_path}': {str(e)}"
+        return f"**Error:** {error_msg}"
+# Excel Tool
+def excel_to_csv(excel_path: str) -> str:
+    """
+    Given an Excel file path or URL and an optional sheet name,
+    reads the spreadsheet using openpyxl and returns its contents as CSV text.
+    Args:
+        excel_path (str): The URL or local file path of the Excel file to convert.
+    Returns:
+        str: The CSV-formatted content of the sheet.
+    """
+    print("--- Converting Excel to CSV ---")
+    print(f"Excel Path: {excel_path}")
+    excel_path = os.path.join("./GAIA_resource/", excel_path)
+    try:
+        # Load workbook from URL or local file
+        if excel_path.startswith("http"):
+            response = requests.get(excel_path)
+            response.raise_for_status()
+            data_stream = BytesIO(response.content)
+            wb = load_workbook(filename=data_stream, data_only=True)
+        else:
+            wb = load_workbook(filename=excel_path, data_only=True)
+        # Select worksheet
+        ws = wb.active
+        # Build CSV lines manually
+        lines = []
+        for row in ws.iter_rows(values_only=True):
+            # Convert each cell to string, using empty string for None
+            str_cells = ["" if cell is None else str(cell) for cell in row]
+            # Join cells with commas
+            line = ",".join(str_cells)
+            lines.append(line)
+        # Combine all lines into one CSV string
+        print("Converted Excel to CSV result : ", lines)
+        return "\n".join(lines)
+    except Exception as e:
+        return f"Error converting Excel to CSV: {e}"
+data_analyzer_agent = LlmAgent(
+    model="gemini-2.5-flash-preview-04-17",
+    name="data_analyzer_agent",
+    description="When data is provided, analyze it and derive an appropriate answer.",
+    instruction="""
+# Steps
+1. **Data Review**: Understand the data provided and understand what it shows.
+2. **Prepare for Analysis**: If necessary, clean the data and prepare it for analysis.
+3. **Data Analysis**: Analyze the data using appropriate methods to find meaningful information and trends.
+4. **Interpretation**: Interpret the analysis results to answer questions and doubts.
+5. **Present Conclusions**: Present your conclusions and insights in a logical summary.
+# Output Format
+- State your conclusions in a short sentence, but make sure they are clear and specific.
+- If necessary, use tables and graphs to provide additional information.
+# Examples
+- **Input Data**:
+- Survey data on age, gender, occupation, and annual income
+- **Analysis Results**:
+- The older the person, the higher the annual income tends to be.
+- **Statement of conclusion**:
+- "The survey data shows that the older you are, the higher your average annual income is."
+# Notes
+- If your data set is very large, consider using sample data or segmenting your data for analysis.
+- Distinguish between qualitative and quantitative data and choose the appropriate analysis method for each.
+""",
+    tools=[excel_to_csv] # Provide the function directly
+)
+# Read file ascii
+def read_file_ascii(file_path: str) -> str:
+    """
+    Given a file URL or local file path, reads the file content and returns it as an ASCII string.
+    Args:
+        file_path (str): The URL or local file path of the file to read.
+    Returns:
+        str: The ASCII-decoded content of the file, or an error message on failure.
+    """
+    print("File Path : ", file_path)
+    file_path = os.path.join("./GAIA_resource/", file_path)
+    try:
+        # Load data from URL or local file
+        if file_path.startswith("http"):
+            response = requests.get(file_path)
+            response.raise_for_status()
+            data_bytes = response.content
+        else:
+            with open(file_path, "rb") as f:
+                data_bytes = f.read()
+        # Decode bytes to ASCII string, replacing errors
+        ascii_str = data_bytes.decode("ascii", errors="replace")
+        return ascii_str
+    except Exception as e:
+        return f"Error reading file as ASCII: {e}"
+# Call Agent Async
+async def call_agent_async(query: str, runner, user_id, session_id):
+  """Sends a query to the agent and prints the final response."""
+  print(f"\n>>> User Query: {query}")
+  # Prepare the user's message in ADK format
+  content = types.Content(role='user', parts=[types.Part(text=query)])
+  final_response_text = "Agent did not produce a final response." # Default
+  # Key Concept: run_async executes the agent logic and yields Events.
+  # We iterate through events to find the final answer.
+  async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=content):
+      # Key Concept: is_final_response() marks the concluding message for the turn.
+      if event.is_final_response():
+          if event.content and event.content.parts:
+             # Assuming text response in the first part
+             final_response_text = event.content.parts[0].text
+          elif event.actions and event.actions.escalate: # Handle potential errors/escalations
+             final_response_text = f"Agent escalated: {event.error_message or 'No specific message.'}"
+          # Add more checks here if needed (e.g., specific error codes)
+          break # Stop processing events once the final response is found
+  print(f"<<< Agent Response: {final_response_text}")
+  return final_response_text # Return the final response text
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# for GAIA Repo
+GAIA_REPO_ID        = "gaia-benchmark/GAIA"
+GAIA_VALIDATION_DIR = "2023/validation"
+LOCAL_GAIA_DIR      = "GAIA_resource"
+# --- GAIA Data Download Utility ---
+def download_gaia_validation(local_dir: str = LOCAL_GAIA_DIR):
+    """
+        Download only the validation part of the Hugging Face GAIA dataset to
+        local_dir/2023/validation/.
+        If it has already been downloaded, it will not be downloaded again.
+    """
+    target_path = os.path.join(local_dir, GAIA_VALIDATION_DIR)
+    if os.path.isdir(target_path) and os.listdir(target_path):
+        print(f"GAIA validation data already exists at {target_path}")
+        return
+    os.makedirs(local_dir, exist_ok=True)
+    print(f"Downloading GAIA validation data into {local_dir} ...")
+    snapshot_download(
+        repo_id=GAIA_REPO_ID,
+        repo_type="dataset",
+        allow_patterns=[f"{GAIA_VALIDATION_DIR}/*"],
+        local_dir=local_dir,
+        local_dir_use_symlinks=False
+    )
+    print(f"Downloaded GAIA validation data to {target_path}")
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+#class BasicAgent:
+#    def __init__(self):
+#        print("BasicAgent initialized.")
+#    def __call__(self, question: str) -> str:
+#        print(f"Agent received question (first 50 chars): {question[:50]}...")
+#        #fixed_answer = "This is a default answer."
+#        #print(f"Agent returning fixed answer: {fixed_answer}")
+#
+#        return fixed_answer
+description_text = """
+You are GAIA Solver, a highly capable AI assistant designed to answer questions from the GAIA benchmark accurately and concisely using a suite of available tools. Your goal is to provide the precise answer in the requested format based *only* on the provided question text.
+"""
+instruction_text = """
+Thinking Process:
+1.  **Analyze Question & Identify Files:** Carefully read the question. Determine the core task and the **exact final answer format**. Check if the question explicitly mentions an attached file (image, Excel, audio, code).
+2.  **Identify Filename:** If a file is mentioned, identify its filename from the text (e.g., "Homework.mp3", "image.png"). If no specific filename is given for a required file type, state that you need the filename. **Do not guess filenames.**
+3.  **Plan:** Create a step-by-step plan using tools. If a file is needed, include the correct tool call with the identified filename.
+4.  **Execute & Refine:** Execute the plan. Pass correct arguments (especially filenames). Evaluate tool outputs. If errors occur (e.g., file not found, API errors) or info is insufficient, revise the plan (e.g., use `web_search`, different tool prompts).
+5.  **Synthesize Answer:** Combine information. Use `execute_python_code` for final formatting/calculations.
+6.  **Final Output:** Generate **only the final answer** in the requested format. No extra text. If the answer cannot be found or a required filename was missing/invalid, output: "I could not find the answer."
+Constraints:
+- Base actions *only* on the provided question text.
+- Adhere strictly to the requested output format.
+"""
+async def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 0. Download GAIA data
+    try:
+        download_gaia_validation()
+    except Exception as e:
+        err = f"Error downloading GAIA validation data: {e}"
+        print(err)
+        return err, None
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        root_agent = Agent(
+            name = "root_agent",
+            model = "gemini-2.5-pro-preview-03-25",
+            description = description_text,
+            instruction = instruction_text,
+            tools = [
+                agent_tool.AgentTool(agent=search_agent),
+                agent_tool.AgentTool(agent=coding_agent),
+                agent_tool.AgentTool(agent=code_execution_agent),
+                understand_youtube_video,
+                understand_image,
+                transcribe_audio,
+                agent_tool.AgentTool(agent=data_analyzer_agent),
+                read_file_ascii,
+            ]
+        )
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        question_file_name = item.get("file_name")
+        question_all = question_text + " file_name = " + question_file_name
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            APP_NAME = "gaia_agent"
+            USER_ID = "user_1"
+            SESSION_ID = item.get("task_id")
+            session_service = InMemorySessionService()
+            session = session_service.create_session(
+                app_name=APP_NAME,
+                user_id=USER_ID,
+                session_id=SESSION_ID
+                )
+            runner = Runner(
+                agent=root_agent, # The agent we want to run
+                app_name=APP_NAME,   # Associates runs with our app
+                session_service=session_service # Uses our session manager
+            )
+            submitted_answer = await call_agent_async(question_all,
+                                       runner=runner,
+                                       user_id=USER_ID,
+                                       session_id=SESSION_ID)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
+        **Introduction:**
+        This is an agent for GAIA benchmark.
+        Built with Google ADK (Agent Development Kit)
         **Instructions:**
+        Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

excel_test.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os
+import requests
+from openpyxl import load_workbook
+# Excel Tool
+def excel_to_csv(excel_path: str) -> str:
+    """
+    Given an Excel file path or URL and an optional sheet name,
+    reads the spreadsheet using openpyxl and returns its contents as CSV text.
+    Args:
+        excel_path (str): The URL or local file path of the Excel file to convert.
+    Returns:
+        str: The CSV-formatted content of the sheet.
+    """
+    print("--- Converting Excel to CSV ---")
+    print(f"Excel Path: {excel_path}")
+    excel_path = os.path.join("./GAIA_resource/", excel_path)
+    try:
+        # Load workbook from URL or local file
+        if excel_path.startswith("http"):
+            response = requests.get(excel_path)
+            response.raise_for_status()
+            data_stream = BytesIO(response.content)
+            wb = load_workbook(filename=data_stream, data_only=True)
+        else:
+            wb = load_workbook(filename=excel_path, data_only=True)
+        # Select worksheet
+        ws = wb.active
+        # Build CSV lines manually
+        lines = []
+        for row in ws.iter_rows(values_only=True):
+            # Convert each cell to string, using empty string for None
+            str_cells = ["" if cell is None else str(cell) for cell in row]
+            # Join cells with commas
+            line = ",".join(str_cells)
+            lines.append(line)
+        # Combine all lines into one CSV string
+        print("Converted Excel to CSV result : ", lines)
+        return "\n".join(lines)
+    except Exception as e:
+        return f"Error converting Excel to CSV: {e}"
+excel_to_csv("7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")

requirements.txt CHANGED Viewed

@@ -1,2 +1,5 @@
 gradio
-requests

 gradio
+requests
+google-genai
+google.adk
+openpyxl