agents_course_final_assignement

Paused

App Files Files Community

drAbreu commited on 9 days ago

Commit

cb2e2ec

1 Parent(s): a0bacd9

Improved the interface

Browse files

Files changed (2) hide show

README.md +10 -1
app.py +342 -66

README.md CHANGED Viewed

@@ -12,4 +12,13 @@ hf_oauth: true
 hf_oauth_expiration_minutes: 480
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 hf_oauth_expiration_minutes: 480
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## Tag 1.0.0
+- Basic agent without any tools obtains 1 / 20 correct answers using claude 3.7
+-
+## Tag 1.1.0
+- Adding web and wikipedia tools to single agent obtains 5 / 20 correct answers using claude 3.7

app.py CHANGED Viewed

@@ -3,15 +3,21 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-from agents.llama_index_agent import GaiaAgent
 import asyncio
-# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 CLAUDE = {
     "model_provider": "anthropic",
     "model_name": "claude-3-7-sonnet-latest"
@@ -20,120 +26,362 @@ OPENAI = {
     "model_provider": "openai",
     "model_name": "gpt-4o"
 }
 class BasicAgent:
     def __init__(
             self,
-            model_provider="openai",
-            model_name="gpt-4o",
-            api_key=None
             ):
         """
-        Initialize the BasicAgent with configurable model settings.
         Args:
-            model_provider: LLM provider to use (openai, anthropic, etc.)
             model_name: Specific model to use
-            api_key: Optional API key (defaults to environment variable)
         """
-        self.agent = GaiaAgent(**CLAUDE)
         print(f"BasicAgent initialized with {model_provider} {model_name}.")
     def __call__(self, question: str) -> str:
-        """Process a GAIA benchmark question and return the formatted answer."""
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         async def agentic_main():
-            response = await self.agent.run(question)
-            return response
-        response = asyncio.run(agentic_main())
-        final_answer = response.response.blocks[-1].text
         print(f"Agent returning answer: {final_answer}")
         return final_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
-    if profile:
-        username= f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
-    results_log = []
-    answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
-            continue
-        try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-        except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
@@ -141,9 +389,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
@@ -172,42 +423,67 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
-    gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -215,7 +491,7 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
@@ -224,5 +500,5 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)

 import requests
 import inspect
 import pandas as pd
+import json
 import asyncio
+from pathlib import Path
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+from tqdm.asyncio import tqdm as async_tqdm
+from agents.llama_index_agent import GaiaAgent
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+CACHE_DIR = "cache"
+CACHE_FILE = os.path.join(CACHE_DIR, "agent_cache.json")
+MAX_CONCURRENT_REQUESTS = 3  # Limit concurrent API calls
+# Model configurations
 CLAUDE = {
     "model_provider": "anthropic",
     "model_name": "claude-3-7-sonnet-latest"
     "model_provider": "openai",
     "model_name": "gpt-4o"
 }
+# --- Optimized Agent Implementation ---
+class OptimizedGaiaAgent:
+    """
+    Enhanced GAIA agent with caching and asynchronous processing capabilities.
+    """
+    def __init__(
+            self,
+            model_config=CLAUDE,
+            use_cache=True,
+            cache_file=CACHE_FILE,
+            max_concurrent=MAX_CONCURRENT_REQUESTS
+            ):
+        """
+        Initialize the optimized agent.
+        Args:
+            model_config: Dictionary with model_provider and model_name
+            use_cache: Whether to use caching
+            cache_file: Path to the cache file
+            max_concurrent: Maximum number of concurrent requests
+        """
+        self.agent = GaiaAgent(**model_config)
+        self.model_config = model_config
+        self.use_cache = use_cache
+        self.cache_file = cache_file
+        self.cache = self._load_cache() if use_cache else {}
+        self.semaphore = asyncio.Semaphore(max_concurrent)
+        print(f"OptimizedGaiaAgent initialized with {model_config['model_provider']} {model_config['model_name']}")
+        if use_cache:
+            print(f"Cache loaded with {len(self.cache)} answers")
+    def _load_cache(self) -> Dict[str, str]:
+        """Load cached answers from file"""
+        # Create cache directory if it doesn't exist
+        os.makedirs(os.path.dirname(self.cache_file), exist_ok=True)
+        cache_path = Path(self.cache_file)
+        if cache_path.exists():
+            try:
+                with open(cache_path, 'r') as f:
+                    return json.load(f)
+            except Exception as e:
+                print(f"Error loading cache: {e}")
+                return {}
+        return {}
+    def _save_cache(self) -> None:
+        """Save cached answers to file"""
+        try:
+            with open(self.cache_file, 'w') as f:
+                json.dump(self.cache, f, indent=2)
+        except Exception as e:
+            print(f"Error saving cache: {e}")
+    def _get_cache_key(self, question: str) -> str:
+        """Generate a consistent key for the cache"""
+        # Strip whitespace and normalize
+        return question.strip()
+    async def process_question(self, task_id: str, question: str) -> Dict[str, Any]:
+        """
+        Process a single question, using cache if available.
+        Args:
+            task_id: ID of the task/question
+            question: The question text
+        Returns:
+            Dictionary with task_id, question, answer, and metadata
+        """
+        cache_key = self._get_cache_key(question)
+        # Check cache first
+        if self.use_cache and cache_key in self.cache:
+            print(f"🔄 Cache hit for task {task_id[:8]}...")
+            return {
+                "task_id": task_id,
+                "question": question,
+                "submitted_answer": self.cache[cache_key],
+                "cached": True,
+                "error": False
+            }
+        # Process the question (with semaphore to limit concurrent requests)
+        async with self.semaphore:
+            print(f"⚙️ Processing task {task_id[:8]}...")
+            try:
+                response = await self.agent.run(question)
+                answer = response.response.blocks[-1].text
+                # Save to cache
+                if self.use_cache:
+                    self.cache[cache_key] = answer
+                    # Use asyncio.to_thread for file I/O to avoid blocking
+                    await asyncio.to_thread(self._save_cache)
+                return {
+                    "task_id": task_id,
+                    "question": question,
+                    "submitted_answer": answer,
+                    "cached": False,
+                    "error": False
+                }
+            except Exception as e:
+                error_message = f"ERROR: {str(e)}"
+                print(f"❌ Error processing task {task_id[:8]}: {error_message}")
+                return {
+                    "task_id": task_id,
+                    "question": question,
+                    "submitted_answer": error_message,
+                    "cached": False,
+                    "error": True
+                }
+    async def process_all(
+            self,
+            questions_data: List[Dict[str, Any]],
+            progress_callback=None
+        ) -> List[Dict[str, Any]]:
+        """
+        Process all questions, with progress reporting.
+        Args:
+            questions_data: List of question dictionaries
+            progress_callback: Function to call with progress updates
+        Returns:
+            List of results with answers and metadata
+        """
+        # Filter out invalid questions
+        valid_questions = [
+            item for item in questions_data
+            if item.get("task_id") and item.get("question") is not None
+        ]
+        if not valid_questions:
+            print("No valid questions to process.")
+            return []
+        total = len(valid_questions)
+        print(f"Processing {total} questions with {MAX_CONCURRENT_REQUESTS} concurrent tasks...")
+        # Process questions and collect results
+        results = []
+        # Create async tasks
+        tasks = [
+            self.process_question(item["task_id"], item["question"])
+            for item in valid_questions
+        ]
+        # Process with progress tracking
+        if progress_callback:
+            progress_callback(0, desc="Starting processing...")
+        # Process tasks one by one with progress updates
+        for i, task in enumerate(asyncio.as_completed(tasks)):
+            result = await task
+            results.append(result)
+            # Update progress
+            if progress_callback:
+                progress_callback((i + 1) / total, desc=f"Processed {i + 1}/{total} questions")
+        # Sort results to match original order
+        id_to_result = {result["task_id"]: result for result in results}
+        ordered_results = [
+            id_to_result.get(
+                item["task_id"],
+                {"task_id": item["task_id"], "question": item.get("question"), "submitted_answer": "ERROR: Processing failed", "error": True}
+            )
+            for item in valid_questions
+        ]
+        return ordered_results
+# --- Main Application Class ---
 class BasicAgent:
+    """
+    Optimized agent wrapper for the GAIA benchmark.
+    """
     def __init__(
             self,
+            model_provider="anthropic",
+            model_name="claude-3-7-sonnet-latest",
+            api_key=None,
+            use_cache=True,
+            max_concurrent=MAX_CONCURRENT_REQUESTS
             ):
         """
+        Initialize the BasicAgent with caching and async capabilities.
         Args:
+            model_provider: LLM provider to use
             model_name: Specific model to use
+            api_key: Optional API key
+            use_cache: Whether to use caching
+            max_concurrent: Maximum concurrent requests
         """
+        model_config = {
+            "model_provider": model_provider,
+            "model_name": model_name,
+            "api_key": api_key
+        }
+        self.agent = OptimizedGaiaAgent(
+            model_config=model_config,
+            use_cache=use_cache,
+            max_concurrent=max_concurrent
+        )
         print(f"BasicAgent initialized with {model_provider} {model_name}.")
+    async def process_async(self, questions_data, progress_callback=None):
+        """Process questions asynchronously with progress reporting"""
+        return await self.agent.process_all(questions_data, progress_callback)
     def __call__(self, question: str) -> str:
+        """
+        Process a single question (for compatibility with the original interface).
+        This method is synchronous for backward compatibility.
+        """
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         async def agentic_main():
+            result = await self.agent.process_question("single", question)
+            return result["submitted_answer"]
+        final_answer = asyncio.run(agentic_main())
         print(f"Agent returning answer: {final_answer}")
         return final_answer
+# --- Async Run and Submit Function ---
+async def async_run_and_submit_all(
+        profile: gr.OAuthProfile | None,
+        progress=gr.Progress()
+    ) -> tuple:
     """
+    Asynchronous version of run_and_submit_all.
+    Fetches questions, processes them concurrently, and submits answers.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
+    if not profile:
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
+    username = f"{profile.username}"
+    print(f"User logged in: {username}")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
+        progress(0, desc="Initializing agent...")
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # In the case of an app running as a Hugging Face space, this link points toward your codebase
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
+    progress(0.1, desc="Fetching questions...")
     try:
+        # Use asyncio for the HTTP request
+        async def fetch_questions():
+            loop = asyncio.get_event_loop()
+            return await loop.run_in_executor(
+                None,
+                lambda: requests.get(questions_url, timeout=15)
+            )
+        response = await fetch_questions()
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
+        progress(0.2, desc=f"Successfully fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Process Questions Asynchronously
+    print(f"Processing {len(questions_data)} questions...")
+    try:
+        # Define progress update function
+        def update_progress(value, desc=""):
+            # Scale progress from 0.2-0.8 for the processing phase
+            progress(0.2 + (value * 0.6), desc=desc)
+        results = await agent.process_async(questions_data, update_progress)
+        # Convert results to the expected format
+        answers_payload = [
+            {"task_id": result["task_id"], "submitted_answer": result["submitted_answer"]}
+            for result in results
+        ]
+        # Format for display
+        results_log = [
+            {"Task ID": result["task_id"], "Question": result["question"], "Submitted Answer": result["submitted_answer"]}
+            for result in results
+        ]
+        progress(0.8, desc=f"Processed all {len(results)} questions. Preparing submission...")
+    except Exception as e:
+        print(f"Error during question processing: {e}")
+        return f"Error during question processing: {e}", None
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame([])
     # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
+    progress(0.9, desc="Submitting answers...")
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        async def submit_answers():
+            loop = asyncio.get_event_loop()
+            return await loop.run_in_executor(
+                None,
+                lambda: requests.post(submit_url, json=submission_data, timeout=60)
+            )
+        response = await submit_answers()
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
+        progress(1.0, desc="Complete!")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
         return status_message, results_df
+# Synchronous wrapper for the async function (for Gradio compatibility)
+def run_and_submit_all(profile: gr.OAuthProfile | None, progress=gr.Progress()):
+    """Synchronous wrapper for the async function"""
+    return asyncio.run(async_run_and_submit_all(profile, progress))
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Optimized GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
+        1. Please clone this space, then modify the code to define your agent's logic, the tools, and necessary packages.
+        2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3. Click 'Run Evaluation & Submit All Answers' to fetch questions, process them, and see your score.
+        This implementation features:
+        - **Caching**: Answers are saved to avoid reprocessing the same questions
+        - **Asynchronous Processing**: Questions are processed concurrently for better performance
+        - **Progress Tracking**: See real-time progress as questions are processed
         """
     )
+    with gr.Row():
+        gr.LoginButton()
+        clear_cache_button = gr.Button("Clear Cache")
+    run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    # Define clear cache function
+    def clear_cache():
+        if os.path.exists(CACHE_FILE):
+            try:
+                os.remove(CACHE_FILE)
+                return f"Cache cleared successfully! ({CACHE_FILE})"
+            except Exception as e:
+                return f"Error clearing cache: {e}"
+        return "No cache file found."
+    # Connect the components
+    clear_cache_button.click(
+        fn=clear_cache,
+        outputs=status_output
+    )
     run_button.click(
         fn=run_and_submit_all,
+        inputs=[gr.OAuthProfile()],
         outputs=[status_output, results_table]
     )
+# --- App Entry Point ---
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:  # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Optimized Agent Evaluation...")
     demo.launch(debug=True, share=False)