Final_Assignment_1

Running

App Files Files Community

hgmiya commited on about 6 hours ago

Commit

f6a50f3

1 Parent(s): b1628e5

Implement GAIA Solver: Add agent tools for code execution, YouTube analysis, image understanding, audio transcription, and Excel conversion. Initialize agents and set up asynchronous processing for question handling.

Browse files

Files changed (9) hide show

.gitattributes +1 -2
GAIA_resource/1f975693-876d-457b-a649-393859e79bf3.mp3 +0 -3
GAIA_resource/7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx +0 -0
GAIA_resource/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3 +0 -3
GAIA_resource/cca530fc-4052-43b2-b130-b30968d8aa44.png +0 -0
GAIA_resource/f918266a-b3e0-4914-865d-4faa564f1aef.py +0 -35
__init__.py +1 -1
agent.py → agent_dev.py +76 -92
app.py +70 -82

.gitattributes CHANGED Viewed

@@ -33,5 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-GAIA_resource/1f975693-876d-457b-a649-393859e79bf3.mp3 filter=lfs diff=lfs merge=lfs -text
-GAIA_resource/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3 filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

GAIA_resource/1f975693-876d-457b-a649-393859e79bf3.mp3 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:200f767e732b49efef5c05d128903ee4d2c34e66fdce7f5593ac123b2e637673
-size 280868

GAIA_resource/7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx DELETED Viewed

Binary file (5.29 kB)

GAIA_resource/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b218c951c1f888f0bbe6f46c080f57afc7c9348fffc7ba4da35749ff1e2ac40f
-size 179304

GAIA_resource/cca530fc-4052-43b2-b130-b30968d8aa44.png DELETED Viewed

Binary file (63.1 kB)

GAIA_resource/f918266a-b3e0-4914-865d-4faa564f1aef.py DELETED Viewed

@@ -1,35 +0,0 @@
-from random import randint
-import time
-class UhOh(Exception):
-    pass
-class Hmm:
-    def __init__(self):
-        self.value = randint(-100, 100)
-    def Yeah(self):
-        if self.value == 0:
-            return True
-        else:
-            raise UhOh()
-def Okay():
-    while True:
-        yield Hmm()
-def keep_trying(go, first_try=True):
-    maybe = next(go)
-    try:
-        if maybe.Yeah():
-            return maybe.value
-    except UhOh:
-        if first_try:
-            print("Working...")
-            print("Please wait patiently...")
-        time.sleep(0.1)
-        return keep_trying(go, first_try=False)
-if __name__ == "__main__":
-    go = Okay()
-    print(f"{keep_trying(go)}")

__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from . import agent


1	+ from .adk_web import agent

agent.py → agent_dev.py RENAMED Viewed

@@ -16,6 +16,7 @@ from google.adk.agents import Agent
 from google.adk.tools import google_search, built_in_code_execution
 from google.adk.agents import LlmAgent
 from openpyxl import load_workbook
 import warnings
@@ -102,7 +103,9 @@ def understand_youtube_video(video_url: str, question: str) -> str:
         )
         print("--- Gemini Response Received ---")
         if hasattr(response, 'text'):
             return response.text
         elif response.parts:
              return "".join(part.text for part in response.parts if hasattr(part, 'text'))
@@ -122,41 +125,39 @@ def understand_image(image_file_name: str) -> str:
     Given an image file , this will analyze the image in detail and describe its contents in as much detail as possible.
     Args:
-        image_file_name (str): The file name of the image to analyze. Which given as "file_name" parameter in the question.
     Returns:
         str: The response text generated by the Gemini model.
     """
-    image_url = os.path.join("./GAIA_resource/" , image_file_name)
     print("--- Analyzing Image ---")
-    print(f"Image URL/Path: {image_url}")
     prompt = """
         Analyze the image in detail and describe its contents in as much detail as possible.
         For example, give someone a chess board and describe where each piece is.
-The description should include the following information:
-- General overview of the image
-- Details of important elements and features (e.g., location relationships, attributes, etc.)
-- Identification of specific objects or characters (e.g., game piece names, positions, people, etc.)
-# Steps
-1. Examine the image as a whole and identify the main elements.
-2. Examine each element in detail and identify what it is.
-3. Develop a description of each element based on its characteristic relationships and positions.
-4. Finally, summarize the overall scene or situation.
-# Output Format
-Provide detailed descriptions in paragraphs of text, using bullet points where necessary.
     """
     try:
         # Fetch the image data
-        if image_url.startswith("http"):
-            image_bytes = requests.get(image_url).content
         else:
-            with open(image_url, "rb") as f:
                 image_bytes = f.read()
         # Create image part
@@ -177,8 +178,10 @@ Provide detailed descriptions in paragraphs of text, using bullet points where n
         )
         print("--- Gemini Response Received ---")
         # Extract text from the response
         if hasattr(response, 'text'):
             return response.text
         elif getattr(response, 'parts', None):
             return "".join(part.text for part in response.parts if hasattr(part, 'text'))
@@ -189,7 +192,7 @@ Provide detailed descriptions in paragraphs of text, using bullet points where n
             return f"Model did not return text content.{block_reason}"
     except Exception as e:
-        print(f"Error processing image '{image_url}' with Gemini: {e}")
         return f"Sorry, an error occurred while analyzing the image. Please check the image URL or path. Error details: {str(e)}"
 # Audio Tool
@@ -205,7 +208,6 @@ def transcribe_audio(audio_path: str) -> str:
     """
     print("--- Transcribing Audio ---")
     print(f"Audio Path: {audio_path}")
-    audio_path = os.path.join("./GAIA_resource/", audio_path)
     try:
         # Initialize Gemini client
@@ -229,6 +231,8 @@ def transcribe_audio(audio_path: str) -> str:
         else:
             transcript = "Model did not return text content."
         # Format as Markdown
         markdown_transcript = (
             "## Audio Transcription Result\n"
@@ -258,14 +262,7 @@ def excel_to_csv(excel_path: str) -> str:
     excel_path = os.path.join("./GAIA_resource/", excel_path)
     try:
-        # Load workbook from URL or local file
-        if excel_path.startswith("http"):
-            response = requests.get(excel_path)
-            response.raise_for_status()
-            data_stream = BytesIO(response.content)
-            wb = load_workbook(filename=data_stream, data_only=True)
-        else:
-            wb = load_workbook(filename=excel_path, data_only=True)
         # Select worksheet
         ws = wb.active
@@ -286,70 +283,56 @@ def excel_to_csv(excel_path: str) -> str:
     except Exception as e:
         return f"Error converting Excel to CSV: {e}"
-data_analyzer_agent = LlmAgent(
-    model="gemini-2.5-flash-preview-04-17",
-    name="data_analyzer_agent",
-    description="When data is provided, analyze it and derive an appropriate answer.",
-    instruction="""
-# Steps
-1. **Data Review**: Understand the data provided and understand what it shows.
-2. **Prepare for Analysis**: If necessary, clean the data and prepare it for analysis.
-3. **Data Analysis**: Analyze the data using appropriate methods to find meaningful information and trends.
-4. **Interpretation**: Interpret the analysis results to answer questions and doubts.
-5. **Present Conclusions**: Present your conclusions and insights in a logical summary.
-# Output Format
-- State your conclusions in a short sentence, but make sure they are clear and specific.
-- If necessary, use tables and graphs to provide additional information.
-# Examples
-- **Input Data**:
-- Survey data on age, gender, occupation, and annual income
-- **Analysis Results**:
-- The older the person, the higher the annual income tends to be.
-- **Statement of conclusion**:
-- "The survey data shows that the older you are, the higher your average annual income is."
-# Notes
-- If your data set is very large, consider using sample data or segmenting your data for analysis.
-- Distinguish between qualitative and quantitative data and choose the appropriate analysis method for each.
-""",
-    tools=[excel_to_csv] # Provide the function directly
-)
-# Read file ascii
-def read_file_ascii(file_path: str) -> str:
     """
-    Given a file URL or local file path, reads the file content and returns it as an ASCII string.
     Args:
-        file_path (str): The URL or local file path of the file to read.
     Returns:
-        str: The ASCII-decoded content of the file, or an error message on failure.
     """
     print("File Path : ", file_path)
-    file_path = os.path.join("./GAIA_resource/", file_path)
     try:
-        # Load data from URL or local file
-        if file_path.startswith("http"):
-            response = requests.get(file_path)
-            response.raise_for_status()
-            data_bytes = response.content
-        else:
-            with open(file_path, "rb") as f:
-                data_bytes = f.read()
         # Decode bytes to ASCII string, replacing errors
-        ascii_str = data_bytes.decode("ascii", errors="replace")
-        return ascii_str
     except Exception as e:
-        return f"Error reading file as ASCII: {e}"
 # Call Agent Async
 async def call_agent_async(query: str, runner, user_id, session_id):
   """Sends a query to the agent and prints the final response."""
@@ -405,8 +388,8 @@ Thinking Process:
 1.  **Analyze Question & Identify Files:** Carefully read the question. Determine the core task and the **exact final answer format**. Check if the question explicitly mentions an attached file (image, Excel, audio, code).
 2.  **Identify Filename:** If a file is mentioned, identify its filename from the text (e.g., "Homework.mp3", "image.png"). If no specific filename is given for a required file type, state that you need the filename. **Do not guess filenames.**
 3.  **Plan:** Create a step-by-step plan using tools. If a file is needed, include the correct tool call with the identified filename.
-4.  **Execute & Refine:** Execute the plan. Pass correct arguments (especially filenames). Evaluate tool outputs. If errors occur (e.g., file not found, API errors) or info is insufficient, revise the plan (e.g., use `web_search`, different tool prompts).
-5.  **Synthesize Answer:** Combine information. Use `execute_python_code` for final formatting/calculations.
 6.  **Final Output:** Generate **only the final answer** in the requested format. No extra text. If the answer cannot be found or a required filename was missing/invalid, output: "I could not find the answer."
 Constraints:
@@ -435,8 +418,9 @@ async def main():
                 understand_youtube_video,
                 understand_image,
                 transcribe_audio,
-                agent_tool.AgentTool(agent=data_analyzer_agent),
-                read_file_ascii,
             ]
         )
     except Exception as e:
@@ -469,17 +453,14 @@ async def main():
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    i = 0
     for item in questions_data:
-        i += 1
-        if i < 12:
-            continue
-        elif i > 12:
-            break
         task_id = item.get("task_id")
         question_text = item.get("question")
-        question_file_name = item.get("file_name")
-        question_all = question_text + " file_name = " + question_file_name
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
@@ -500,16 +481,19 @@ async def main():
                 app_name=APP_NAME,   # Associates runs with our app
                 session_service=session_service # Uses our session manager
             )
-            submitted_answer = await call_agent_async(question_all,
                                        runner=runner,
                                        user_id=USER_ID,
-                                       session_id=SESSION_ID)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")

 from google.adk.tools import google_search, built_in_code_execution
 from google.adk.agents import LlmAgent
+from huggingface_hub import snapshot_download
 from openpyxl import load_workbook
 import warnings
         )
         print("--- Gemini Response Received ---")
         if hasattr(response, 'text'):
+            print("Video Description : ", response.text)
             return response.text
         elif response.parts:
              return "".join(part.text for part in response.parts if hasattr(part, 'text'))
     Given an image file , this will analyze the image in detail and describe its contents in as much detail as possible.
     Args:
+        image_file_name (str): The file name of the image to analyze.
     Returns:
         str: The response text generated by the Gemini model.
     """
     print("--- Analyzing Image ---")
+    print(f"Image URL/Path: {image_file_name}")
     prompt = """
         Analyze the image in detail and describe its contents in as much detail as possible.
         For example, give someone a chess board and describe where each piece is.
+        The description should include the following information:
+        - General overview of the image
+        - Details of important elements and features (e.g., location relationships, attributes, etc.)
+        - Identification of specific objects or characters (e.g., game piece names, positions, people, etc.)
+        # Steps
+        1. Examine the image as a whole and identify the main elements.
+        2. Examine each element in detail and identify what it is.
+        3. Develop a description of each element based on its characteristic relationships and positions.
+        4. Finally, summarize the overall scene or situation.
+        # Output Format
+        Provide detailed descriptions in paragraphs of text, using bullet points where necessary.
     """
     try:
         # Fetch the image data
+        if image_file_name.startswith("http"):
+            image_bytes = requests.get(image_file_name).content
         else:
+            with open(image_file_name, "rb") as f:
                 image_bytes = f.read()
         # Create image part
         )
         print("--- Gemini Response Received ---")
         # Extract text from the response
         if hasattr(response, 'text'):
+            print("Image Description : ", response.text)
             return response.text
         elif getattr(response, 'parts', None):
             return "".join(part.text for part in response.parts if hasattr(part, 'text'))
             return f"Model did not return text content.{block_reason}"
     except Exception as e:
+        print(f"Error processing image '{image_file_name}' with Gemini: {e}")
         return f"Sorry, an error occurred while analyzing the image. Please check the image URL or path. Error details: {str(e)}"
 # Audio Tool
     """
     print("--- Transcribing Audio ---")
     print(f"Audio Path: {audio_path}")
     try:
         # Initialize Gemini client
         else:
             transcript = "Model did not return text content."
+        print("Transcript : ", transcript)
         # Format as Markdown
         markdown_transcript = (
             "## Audio Transcription Result\n"
     excel_path = os.path.join("./GAIA_resource/", excel_path)
     try:
+        wb = load_workbook(filename=excel_path, data_only=True)
         # Select worksheet
         ws = wb.active
     except Exception as e:
         return f"Error converting Excel to CSV: {e}"
+# Read text file
+def LoadTextFileTool(file_path: str) -> str:
     """
+    This tool loads any text file
     Args:
+        file_path (str): File Path
     Returns:
+        str: Text file contents.
     """
+    print("---Load Text File Tool---")
     print("File Path : ", file_path)
     try:
         # Decode bytes to ASCII string, replacing errors
+        with open(file_path, 'r', encoding='utf-8') as file:
+            return file.read()
     except Exception as e:
+        return f"Error reading text file: {e}"
+# Get task file
+def GetTaskFileTool(file_name: str, task_id: str) -> str:
+    """
+        This tool downloads the file content associated with the given task_id if exists. Returns absolute file path.
+        Args:
+            task_id (str): Task id
+            file_name (str) File name
+        Returns:
+            str: absolute file path
+    """
+    print("---Get Task File Tool---")
+    print("File Name : ", file_name)
+    try:
+        response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
+        response.raise_for_status()
+        with open(file_name, 'wb') as file:
+            file.write(response.content)
+        return os.path.abspath(file_name)
+    except TypeError as e:
+        return f"Error GetTaskFileTool '{file_name}' : {str(e)}"
+    except Exception as e:
+        return f"Error reading file: {e}"
 # Call Agent Async
 async def call_agent_async(query: str, runner, user_id, session_id):
   """Sends a query to the agent and prints the final response."""
 1.  **Analyze Question & Identify Files:** Carefully read the question. Determine the core task and the **exact final answer format**. Check if the question explicitly mentions an attached file (image, Excel, audio, code).
 2.  **Identify Filename:** If a file is mentioned, identify its filename from the text (e.g., "Homework.mp3", "image.png"). If no specific filename is given for a required file type, state that you need the filename. **Do not guess filenames.**
 3.  **Plan:** Create a step-by-step plan using tools. If a file is needed, include the correct tool call with the identified filename.
+4.  **Execute & Refine:** Execute the plan. Pass correct arguments (especially filenames). Evaluate tool outputs. If errors occur (e.g., file not found, API errors) or info is insufficient, revise the plan (e.g., use different tool prompts).
+5.  **Synthesize Answer:** Combine information. Use `coding_agent` for final formatting/calculations.
 6.  **Final Output:** Generate **only the final answer** in the requested format. No extra text. If the answer cannot be found or a required filename was missing/invalid, output: "I could not find the answer."
 Constraints:
                 understand_youtube_video,
                 understand_image,
                 transcribe_audio,
+                excel_to_csv,
+                GetTaskFileTool,
+                LoadTextFileTool,
             ]
         )
     except Exception as e:
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
+        if task_id:
+            question_text += " task_id = " + task_id
+        if file_name:
+            question_text += " file_name = " + file_name
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
                 app_name=APP_NAME,   # Associates runs with our app
                 session_service=session_service # Uses our session manager
             )
+            submitted_answer = await call_agent_async(question_text,
                                        runner=runner,
                                        user_id=USER_ID,
+                                       session_id=SESSION_ID
+                                       )
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+        if os.path.exists(file_name):
+            os.remove(file_name)
     if not answers_payload:
         print("Agent did not produce any answers to submit.")

app.py CHANGED Viewed

@@ -104,7 +104,9 @@ def understand_youtube_video(video_url: str, question: str) -> str:
         )
         print("--- Gemini Response Received ---")
         if hasattr(response, 'text'):
             return response.text
         elif response.parts:
              return "".join(part.text for part in response.parts if hasattr(part, 'text'))
@@ -124,41 +126,39 @@ def understand_image(image_file_name: str) -> str:
     Given an image file , this will analyze the image in detail and describe its contents in as much detail as possible.
     Args:
-        image_file_name (str): The file name of the image to analyze. Which given as "file_name" parameter in the question.
     Returns:
         str: The response text generated by the Gemini model.
     """
-    image_url = os.path.join("./GAIA_resource/" , image_file_name)
     print("--- Analyzing Image ---")
-    print(f"Image URL/Path: {image_url}")
     prompt = """
         Analyze the image in detail and describe its contents in as much detail as possible.
         For example, give someone a chess board and describe where each piece is.
-The description should include the following information:
-- General overview of the image
-- Details of important elements and features (e.g., location relationships, attributes, etc.)
-- Identification of specific objects or characters (e.g., game piece names, positions, people, etc.)
-# Steps
-1. Examine the image as a whole and identify the main elements.
-2. Examine each element in detail and identify what it is.
-3. Develop a description of each element based on its characteristic relationships and positions.
-4. Finally, summarize the overall scene or situation.
-# Output Format
-Provide detailed descriptions in paragraphs of text, using bullet points where necessary.
     """
     try:
         # Fetch the image data
-        if image_url.startswith("http"):
-            image_bytes = requests.get(image_url).content
         else:
-            with open(image_url, "rb") as f:
                 image_bytes = f.read()
         # Create image part
@@ -179,8 +179,10 @@ Provide detailed descriptions in paragraphs of text, using bullet points where n
         )
         print("--- Gemini Response Received ---")
         # Extract text from the response
         if hasattr(response, 'text'):
             return response.text
         elif getattr(response, 'parts', None):
             return "".join(part.text for part in response.parts if hasattr(part, 'text'))
@@ -191,7 +193,7 @@ Provide detailed descriptions in paragraphs of text, using bullet points where n
             return f"Model did not return text content.{block_reason}"
     except Exception as e:
-        print(f"Error processing image '{image_url}' with Gemini: {e}")
         return f"Sorry, an error occurred while analyzing the image. Please check the image URL or path. Error details: {str(e)}"
 # Audio Tool
@@ -207,7 +209,6 @@ def transcribe_audio(audio_path: str) -> str:
     """
     print("--- Transcribing Audio ---")
     print(f"Audio Path: {audio_path}")
-    audio_path = os.path.join("./GAIA_resource/", audio_path)
     try:
         # Initialize Gemini client
@@ -231,6 +232,8 @@ def transcribe_audio(audio_path: str) -> str:
         else:
             transcript = "Model did not return text content."
         # Format as Markdown
         markdown_transcript = (
             "## Audio Transcription Result\n"
@@ -260,14 +263,7 @@ def excel_to_csv(excel_path: str) -> str:
     excel_path = os.path.join("./GAIA_resource/", excel_path)
     try:
-        # Load workbook from URL or local file
-        if excel_path.startswith("http"):
-            response = requests.get(excel_path)
-            response.raise_for_status()
-            data_stream = BytesIO(response.content)
-            wb = load_workbook(filename=data_stream, data_only=True)
-        else:
-            wb = load_workbook(filename=excel_path, data_only=True)
         # Select worksheet
         ws = wb.active
@@ -320,37 +316,54 @@ data_analyzer_agent = LlmAgent(
 )
-# Read file ascii
-def read_file_ascii(file_path: str) -> str:
     """
-    Given a file URL or local file path, reads the file content and returns it as an ASCII string.
     Args:
-        file_path (str): The URL or local file path of the file to read.
     Returns:
-        str: The ASCII-decoded content of the file, or an error message on failure.
     """
     print("File Path : ", file_path)
-    file_path = os.path.join("./GAIA_resource/", file_path)
     try:
-        # Load data from URL or local file
-        if file_path.startswith("http"):
-            response = requests.get(file_path)
-            response.raise_for_status()
-            data_bytes = response.content
-        else:
-            with open(file_path, "rb") as f:
-                data_bytes = f.read()
         # Decode bytes to ASCII string, replacing errors
-        ascii_str = data_bytes.decode("ascii", errors="replace")
-        return ascii_str
     except Exception as e:
-        return f"Error reading file as ASCII: {e}"
 # Call Agent Async
 async def call_agent_async(query: str, runner, user_id, session_id):
@@ -382,36 +395,8 @@ async def call_agent_async(query: str, runner, user_id, session_id):
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# for GAIA Repo
-GAIA_REPO_ID        = "gaia-benchmark/GAIA"
-GAIA_VALIDATION_DIR = "2023/validation"
-LOCAL_GAIA_DIR      = "GAIA_resource"
-# --- GAIA Data Download Utility ---
-def download_gaia_validation(local_dir: str = LOCAL_GAIA_DIR):
-    """
-        Download only the validation part of the Hugging Face GAIA dataset to
-        local_dir/2023/validation/.
-        If it has already been downloaded, it will not be downloaded again.
-    """
-    target_path = os.path.join(local_dir, GAIA_VALIDATION_DIR)
-    if os.path.isdir(target_path) and os.listdir(target_path):
-        print(f"GAIA validation data already exists at {target_path}")
-        return
-    os.makedirs(local_dir, exist_ok=True)
-    print(f"Downloading GAIA validation data into {local_dir} ...")
-    snapshot_download(
-        repo_id=GAIA_REPO_ID,
-        repo_type="dataset",
-        allow_patterns=[f"{GAIA_VALIDATION_DIR}/*"],
-        local_dir=local_dir,
-        local_dir_use_symlinks=False,
-        use_auth_token=True
-    )
-    print(f"Downloaded GAIA validation data to {target_path}")
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 #class BasicAgent:
@@ -435,8 +420,8 @@ Thinking Process:
 1.  **Analyze Question & Identify Files:** Carefully read the question. Determine the core task and the **exact final answer format**. Check if the question explicitly mentions an attached file (image, Excel, audio, code).
 2.  **Identify Filename:** If a file is mentioned, identify its filename from the text (e.g., "Homework.mp3", "image.png"). If no specific filename is given for a required file type, state that you need the filename. **Do not guess filenames.**
 3.  **Plan:** Create a step-by-step plan using tools. If a file is needed, include the correct tool call with the identified filename.
-4.  **Execute & Refine:** Execute the plan. Pass correct arguments (especially filenames). Evaluate tool outputs. If errors occur (e.g., file not found, API errors) or info is insufficient, revise the plan (e.g., use `web_search`, different tool prompts).
-5.  **Synthesize Answer:** Combine information. Use `execute_python_code` for final formatting/calculations.
 6.  **Final Output:** Generate **only the final answer** in the requested format. No extra text. If the answer cannot be found or a required filename was missing/invalid, output: "I could not find the answer."
 Constraints:
@@ -444,7 +429,6 @@ Constraints:
 - Adhere strictly to the requested output format.
 """
 async def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -486,8 +470,9 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
                 understand_youtube_video,
                 understand_image,
                 transcribe_audio,
-                agent_tool.AgentTool(agent=data_analyzer_agent),
-                read_file_ascii,
             ]
         )
     except Exception as e:
@@ -525,8 +510,11 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
-        question_file_name = item.get("file_name")
-        question_all = question_text + " file_name = " + question_file_name
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
@@ -547,7 +535,7 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
                 app_name=APP_NAME,   # Associates runs with our app
                 session_service=session_service # Uses our session manager
             )
-            submitted_answer = await call_agent_async(question_all,
                                        runner=runner,
                                        user_id=USER_ID,
                                        session_id=SESSION_ID)

         )
         print("--- Gemini Response Received ---")
         if hasattr(response, 'text'):
+            print("Video Description : ", response.text)
             return response.text
         elif response.parts:
              return "".join(part.text for part in response.parts if hasattr(part, 'text'))
     Given an image file , this will analyze the image in detail and describe its contents in as much detail as possible.
     Args:
+        image_file_name (str): The file name of the image to analyze.
     Returns:
         str: The response text generated by the Gemini model.
     """
     print("--- Analyzing Image ---")
+    print(f"Image URL/Path: {image_file_name}")
     prompt = """
         Analyze the image in detail and describe its contents in as much detail as possible.
         For example, give someone a chess board and describe where each piece is.
+        The description should include the following information:
+        - General overview of the image
+        - Details of important elements and features (e.g., location relationships, attributes, etc.)
+        - Identification of specific objects or characters (e.g., game piece names, positions, people, etc.)
+        # Steps
+        1. Examine the image as a whole and identify the main elements.
+        2. Examine each element in detail and identify what it is.
+        3. Develop a description of each element based on its characteristic relationships and positions.
+        4. Finally, summarize the overall scene or situation.
+        # Output Format
+        Provide detailed descriptions in paragraphs of text, using bullet points where necessary.
     """
     try:
         # Fetch the image data
+        if image_file_name.startswith("http"):
+            image_bytes = requests.get(image_file_name).content
         else:
+            with open(image_file_name, "rb") as f:
                 image_bytes = f.read()
         # Create image part
         )
         print("--- Gemini Response Received ---")
         # Extract text from the response
         if hasattr(response, 'text'):
+            print("Image Description : ", response.text)
             return response.text
         elif getattr(response, 'parts', None):
             return "".join(part.text for part in response.parts if hasattr(part, 'text'))
             return f"Model did not return text content.{block_reason}"
     except Exception as e:
+        print(f"Error processing image '{image_file_name}' with Gemini: {e}")
         return f"Sorry, an error occurred while analyzing the image. Please check the image URL or path. Error details: {str(e)}"
 # Audio Tool
     """
     print("--- Transcribing Audio ---")
     print(f"Audio Path: {audio_path}")
     try:
         # Initialize Gemini client
         else:
             transcript = "Model did not return text content."
+        print("Transcript : ", transcript)
         # Format as Markdown
         markdown_transcript = (
             "## Audio Transcription Result\n"
     excel_path = os.path.join("./GAIA_resource/", excel_path)
     try:
+        wb = load_workbook(filename=excel_path, data_only=True)
         # Select worksheet
         ws = wb.active
 )
+# Read text file
+def LoadTextFileTool(file_path: str) -> str:
     """
+    This tool loads any text file
     Args:
+        file_path (str): File Path
     Returns:
+        str: Text file contents.
     """
+    print("---Load Text File Tool---")
     print("File Path : ", file_path)
     try:
         # Decode bytes to ASCII string, replacing errors
+        with open(file_path, 'r', encoding='utf-8') as file:
+            return file.read()
     except Exception as e:
+        return f"Error reading text file: {e}"
+# Get task file
+def GetTaskFileTool(file_name: str, task_id: str) -> str:
+    """
+        This tool downloads the file content associated with the given task_id if exists. Returns absolute file path.
+        Args:
+            task_id (str): Task id
+            file_name (str) File name
+        Returns:
+            str: absolute file path
+    """
+    print("---Get Task File Tool---")
+    print("File Name : ", file_name)
+    try:
+        response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
+        response.raise_for_status()
+        with open(file_name, 'wb') as file:
+            file.write(response.content)
+        return os.path.abspath(file_name)
+    except TypeError as e:
+        return f"Error GetTaskFileTool '{file_name}' : {str(e)}"
+    except Exception as e:
+        return f"Error reading file: {e}"
 # Call Agent Async
 async def call_agent_async(query: str, runner, user_id, session_id):
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 #class BasicAgent:
 1.  **Analyze Question & Identify Files:** Carefully read the question. Determine the core task and the **exact final answer format**. Check if the question explicitly mentions an attached file (image, Excel, audio, code).
 2.  **Identify Filename:** If a file is mentioned, identify its filename from the text (e.g., "Homework.mp3", "image.png"). If no specific filename is given for a required file type, state that you need the filename. **Do not guess filenames.**
 3.  **Plan:** Create a step-by-step plan using tools. If a file is needed, include the correct tool call with the identified filename.
+4.  **Execute & Refine:** Execute the plan. Pass correct arguments (especially filenames). Evaluate tool outputs. If errors occur (e.g., file not found, API errors) or info is insufficient, revise the plan (e.g., use different tool prompts).
+5.  **Synthesize Answer:** Combine information. Use `coding_agent` for final formatting/calculations.
 6.  **Final Output:** Generate **only the final answer** in the requested format. No extra text. If the answer cannot be found or a required filename was missing/invalid, output: "I could not find the answer."
 Constraints:
 - Adhere strictly to the requested output format.
 """
 async def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
                 understand_youtube_video,
                 understand_image,
                 transcribe_audio,
+                excel_to_csv,
+                GetTaskFileTool,
+                LoadTextFileTool,
             ]
         )
     except Exception as e:
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
+        if task_id:
+            question_text += " task_id = " + task_id
+        if file_name:
+            question_text += " file_name = " + file_name
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
                 app_name=APP_NAME,   # Associates runs with our app
                 session_service=session_service # Uses our session manager
             )
+            submitted_answer = await call_agent_async(question_text,
                                        runner=runner,
                                        user_id=USER_ID,
                                        session_id=SESSION_ID)