Final_Assignment_Template

Sleeping

App Files Files Community

Datawithsarah commited on 8 days ago

Commit

fc7015b

1 Parent(s): fc54712

Update app.py and requirements.txt for GAIA Agent

Browse files

Files changed (2) hide show

app.py +256 -95
requirements.txt +10 -1

app.py CHANGED Viewed

@@ -1,77 +1,186 @@
 import os
 import gradio as gr
-import requests
-import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class KeywordAgent:
-    def __init__(self):
-        print("KeywordAgent initialized.")
-    def __call__(self, question: str) -> str:
-        q = question.lower().strip()
-        #  Reversed string question
-        if q.startswith(".rewsna"):
-            return q[::-1].strip().lower()
-        #  Mercedes Sosa album trivia
-        elif "mercedes sosa" in q and "studio albums" in q:
-            return "40"  # numeric, leave as is
-        #  Wikipedia Featured Article
-        elif "featured article" in q and "english wikipedia" in q:
-            return "brianboulton"
-        #  Equine veterinarian
-        elif "equine" in q and "veterinarian" in q:
-            return "ross"
-        #  Grocery list (botanical veg only)
-        elif "grocery list" in q and "vegetables" in q:
-            vegetables = [
-                "acorns", "basil", "bell pepper", "broccoli", "celery", "green beans",
-                "lettuce", "peanuts", "sweet potatoes", "whole allspice", "zucchini"
             ]
-            return ", ".join(sorted(vegetables)).strip().lower()
-        #  Audio file / mp3 fallback
-        elif ".mp3" in q or "voice memo" in q or "recording" in q:
-            return "i don't know"
-        #  YouTube / video-based questions
-        elif "youtube" in q or "video" in q:
-            return "i don't know"
-        #  Chess move or image-based logic
-        elif "chess" in q or "position" in q or "image" in q:
-            return "i don't know"
-        #  Table operation for commutativity
-        elif "set s" in q and "*" in q:
-            return "b, c"
-        #  Fallback
-        else:
-            return "i don't know"
-# --- TEMPORARY LIVE TEST BLOCK FOR KEYWORDAGENT ---
-def test_agent_response(question_text):
-    agent = KeywordAgent()
-    return agent(question_text)
-test_interface = gr.Interface(
-    fn=test_agent_response,
-    inputs=gr.Textbox(label="Enter a Question to Test", placeholder="e.g., What is 2 + 2?"),
-    outputs=gr.Textbox(label="Agent's Answer"),
-    title="🔍 Agent Logic Tester",
-    description="Use this to quickly test how the KeywordAgent responds to custom questions."
 )
 def run_and_submit_all( profile: gr.OAuthProfile | None):
@@ -79,7 +188,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
-    # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
@@ -93,13 +202,18 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = KeywordAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -131,11 +245,41 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -197,36 +341,53 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# 🤖 GAIA Final Assignment: Agent Runner")
-    with gr.Tab("🔍 Test Your Agent"):
-        gr.Markdown("Use this to test how your agent responds to custom questions before running full evaluation.")
-        test_input = gr.Textbox(label="Enter a Question", placeholder="e.g., How many studio albums...")
-        test_output = gr.Textbox(label="Agent's Answer", interactive=False)
-        test_button = gr.Button("Test Agent")
-        test_button.click(fn=test_agent_response, inputs=test_input, outputs=test_output)
-    with gr.Tab("📤 Run Evaluation & Submit"):
-        gr.Markdown(
-            """
-            **Instructions:**
-            1. Modify your agent logic.
-            2. Log in to Hugging Face below.
-            3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, and see your score.
-            ---
-            """
-        )
-        gr.LoginButton()
-        run_button = gr.Button("Run Evaluation & Submit All Answers")
-        status_output = gr.Textbox(label="Submission Result", lines=5, interactive=False)
-        results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-        run_button.click(
-            fn=run_and_submit_all,
-            outputs=[status_output, results_table]
-        )
 if __name__ == "__main__":
-    demo.launch(debug=True)

 import os
 import gradio as gr
 import pandas as pd
+import requests
+import subprocess
+import json
+import csv
+import openpyxl
+import whisper
+from typing import Optional
+from bs4 import BeautifulSoup
+from duckduckgo_search import DDGS
+from smolagents import CodeAgent, BaseModel, tool
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
+# ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
+class ClaudeServerModel(BaseModel):
+    def __init__(self, api_key: str, model_id: str = "claude-3-opus-20240229", temperature: float = 0.0):
+        self.api_key = api_key
+        self.model_id = model_id
+        self.temperature = temperature
+    def complete(self, prompt: str) -> str:
+        headers = {
+            "x-api-key": self.api_key,
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json"
+        }
+        body = {
+            "model": self.model_id,
+            "max_tokens": 1024,
+            "temperature": self.temperature,
+            "messages": [
+                {"role": "user", "content": prompt}
             ]
+        }
+        response = requests.post("https://api.anthropic.com/v1/messages", headers=headers, json=body)
+        response.raise_for_status()
+        return response.json()["content"][0]["text"].strip()
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+def download_file(file_name: str) -> None:
+    if not os.path.exists(file_name):
+        url = f"{DEFAULT_API_URL}/files/{file_name.split('.')[0]}"
+        r = requests.get(url)
+        with open(file_name, "wb") as f:
+            f.write(r.content)
+@tool
+def open_file_as_text(file_name: str, filetype: Optional[str] = "txt") -> str:
+    download_file(file_name)
+    try:
+        if filetype == "txt":
+            with open(file_name, "r", encoding="utf-8") as f:
+                return f.read()
+        elif filetype == "json":
+            with open(file_name, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            return json.dumps(data, indent=2)
+        elif filetype == "csv":
+            with open(file_name, "r", encoding="utf-8") as f:
+                reader = csv.reader(f)
+                rows = list(reader)
+            return "\n".join([", ".join(row) for row in rows])
+        elif filetype == "xlsx":
+            wb = openpyxl.load_workbook(file_name, data_only=True)
+            sheet = wb.active
+            content = []
+            for row in sheet.iter_rows(values_only=True):
+                content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
+            return "\n".join(content)
+        elif filetype == "mp3":
+            w = whisper.load_model("base")
+            res = w.transcribe(file_name)
+            return res["text"]
+        else:
+            return f"Unsupported filetype '{filetype}'."
+    except Exception as e:
+        return f"Error opening file '{file_name}': {str(e)}"
+@tool
+def web_search(query: str) -> str:
+    try:
+        with DDGS() as ddgs:
+            results = ddgs.text(query, max_results=3)
+            if not results:
+                return "No results found."
+            return "\n\n".join([f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['href']}" for r in results])
+    except Exception as e:
+        return f"Error during search: {str(e)}"
+def parse_wikipedia_table(table) -> str:
+    rows = []
+    headers = []
+    thead = table.find('thead')
+    if thead:
+        for th in thead.find_all('th'):
+            headers.append(th.get_text(separator=" ", strip=True))
+        if headers:
+            rows.append(" | ".join(headers))
+    tbody = table.find('tbody') or table
+    for tr in tbody.find_all('tr'):
+        cells = tr.find_all(['th', 'td'])
+        cell_texts = [cell.get_text(separator=" ", strip=True) for cell in cells if cell]
+        if cell_texts:
+            rows.append(" | ".join(cell_texts))
+    return "\n".join(rows)
+@tool
+def read_wikipedia_page(url: str) -> str:
+    headers = {"User-Agent": "Mozilla/5.0"}
+    resp = requests.get(url, headers=headers, timeout=10)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "html.parser")
+    content_div = soup.find('div', id='mw-content-text')
+    parts = []
+    for elem in content_div.find_all(['h2', 'h3', 'p', 'ul', 'ol', 'table']):
+        if elem.name in ['h2', 'h3']:
+            parts.append("\n\n" + elem.get_text(strip=True) + "\n")
+        elif elem.name in ['p', 'ul', 'ol']:
+            parts.append(elem.get_text(strip=True))
+        elif elem.name == 'table':
+            parts.append(parse_wikipedia_table(elem))
+    return "\n".join(parts)
+@tool
+def smart_paginate_around_query(full_text: str, query: str) -> list:
+    before_chars = 1000
+    after_chars = 3000
+    q = query.lower()
+    text_lower = full_text.lower()
+    pages = []
+    start = 0
+    while True:
+        idx = text_lower.find(q, start)
+        if idx == -1:
+            break
+        s = max(0, idx - before_chars)
+        e = min(len(full_text), idx + len(q) + after_chars)
+        pages.append(full_text[s:e])
+        start = e
+    return pages
+@tool
+def reverse_sentence(text: str) -> str:
+    return text[::-1]
+@tool
+def run_python_code(file_name: str) -> str:
+    download_file(file_name)
+    try:
+        result = subprocess.run(["python", file_name], capture_output=True, text=True, timeout=10)
+        if result.returncode != 0:
+            return f"Error: {result.stderr.strip()}"
+        return result.stdout.strip()
+    except Exception as e:
+        return f"Execution failed: {e}"
+# Agent Setup
+tools = [
+    open_file_as_text,
+    web_search,
+    read_wikipedia_page,
+    smart_paginate_around_query,
+    reverse_sentence,
+    run_python_code
+]
+model = ClaudeServerModel(
+    api_key=os.getenv("CLAUDE_API_KEY"),
+    model_id="claude-3-opus-20240229"
+)
+agent = CodeAgent(
+    model=model,
+    tools=tools,
+    additional_authorized_imports=["pandas", "numpy", "datetime", "json", "re", "math", "os", "requests", "csv", "urllib"]
 )
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
+    # Determine HF Space Runtime URL and Repo URL
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = CodeAgent(
+            model=model,
+            tools=tools,
+            additional_authorized_imports=["pandas", "numpy", "datetime", "json", "re", "math", "os", "requests", "csv",
+                                           "urllib"]
+        )
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase (useful for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            full_prompt = f"""You are a highly precise answering agent designed to meet the GAIA benchmark's exact-match standards.
+When presented with a question:
+- Use tools appropriately and deliberately. Do not make assumptions or guess answers.
+- Use `web_search` to find external sources only if necessary. If the results include short snippets, you MUST follow the link and read the full content using `read_wikipedia_page`.
+- You have access to `read_wikipedia_page` ONLY — no other external browsing is allowed.
+- When reading long text, ALWAYS use `smart_paginate_around_query` to extract focused context. Use 1-3 general keywords (not full questions) as the query.
+- If the task involves reversing words, letters, or phrases, use the `reverse_sentence` tool. Never reverse text manually.
+- For any file-based task (e.g., .mp3, .csv, .json, .xlsx), use the `file_name` provided in the metadata — not a name mentioned in the question text.
+- Format lists with a single space after each comma.
+- If asked for a number, return digits only — no commas, currency signs, or symbols (e.g., %, $, etc.).
+- If asked for a string, do not include articles (e.g., "the", "a") or abbreviations unless required. Spell out numbers in digit form unless stated otherwise.
+- If asked for a comma-separated list, apply the correct formatting per element type (string or number).
+Once you have the exact answer:
+- Immediately call `final_answer("your_answer")` and stop execution.
+- Never retry, rerun, or generate multiple answers.
+- Do not include reasoning, steps, thoughts, or commentary — just the final value.
+Example:
+If asked: "What is the capital of France?"
+Your answer logic should follow:
+```py
+print("Paris")
+```<end_code>
+Based on the above guidelines, answer the following question:
+--begin of question--
+{question_text}
+--end of question--
+If the questions mentions the need to use a file, use the following `file_name` value as the `file_name` parameter in any function calls:
+file_name: {file_name}"""
+            submitted_answer = agent.run(full_prompt)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup: # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,2 +1,11 @@
 gradio
-requests

 gradio
+smolagents
+pandas
+requests
+beautifulsoup4
+duckduckgo-search
+openpyxl
+whisper
+torch
+ffmpeg-python
+python-dotenv