agents_course_final_assignement

Paused

App Files Files Community

drAbreu commited on 24 days ago

Commit

f042db0

1 Parent(s): adf2ae1

Added cache to the answers

Browse files

Files changed (2) hide show

README.md +4 -1
app.py +233 -57

README.md CHANGED Viewed

@@ -20,5 +20,8 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 -
 ## Tag 1.1.0
-- Adding web and wikipedia tools to single agent obtains 5 / 20 correct answers using claude 3.7

 -
 ## Tag 1.1.0
+- Adding web and wikipedia tools to single agent obtains 5 / 20 correct answers using claude 3.7 and gpt-4o
+## Tag 1.2.0
+- Adding a `writer_agent` obtains 7 / 20 correct answers using claude 3.7 for research and gpt-4o for write the answers

app.py CHANGED Viewed

@@ -6,6 +6,9 @@ import pandas as pd
 import asyncio
 from llama_index.core.agent.workflow import AgentWorkflow
 from agents.llama_index_agent import GaiaAgent, create_writer_agent
 # (Keep Constants as is)
 # --- Constants ---
@@ -114,16 +117,104 @@ class BasicAgent:
         return final_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -132,17 +223,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
-    try:
-        agent = BasicAgent()
-    except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
@@ -164,72 +255,144 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-        except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
-    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
-        print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
@@ -237,19 +400,21 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
@@ -258,14 +423,24 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
@@ -288,4 +463,5 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import asyncio
 from llama_index.core.agent.workflow import AgentWorkflow
 from agents.llama_index_agent import GaiaAgent, create_writer_agent
+import json
+import hashlib
+from pathlib import Path
 # (Keep Constants as is)
 # --- Constants ---
         return final_answer
+class ResponseCache:
+    """Cache manager for storing and retrieving agent responses."""
+    def __init__(self, cache_file="agent_cache.json"):
+        """Initialize the cache manager.
+        Args:
+            cache_file: Path to the JSON file for storing the cache
+        """
+        self.cache_file = cache_file
+        self.cache = self._load_cache()
+        # Stats for the current session
+        self.cache_hits = 0
+        self.cache_misses = 0
+    def _load_cache(self):
+        """Load the cache from disk."""
+        try:
+            if os.path.exists(self.cache_file):
+                with open(self.cache_file, 'r') as f:
+                    return json.load(f)
+            return {}
+        except Exception as e:
+            print(f"Error loading cache: {e}. Starting with empty cache.")
+            return {}
+    def _save_cache(self):
+        """Save the cache to disk."""
+        try:
+            with open(self.cache_file, 'w') as f:
+                json.dump(self.cache, f)
+        except Exception as e:
+            print(f"Error saving cache: {e}")
+    def get_hash(self, question):
+        """Create a consistent hash for a question."""
+        return hashlib.md5(question.encode('utf-8')).hexdigest()
+    def get(self, question):
+        """Get a cached response if available.
+        Returns:
+            tuple: (cached_answer, hit_status)
+              - cached_answer: The cached answer or None if not found
+              - hit_status: True if cache hit, False if miss
+        """
+        question_hash = self.get_hash(question)
+        if question_hash in self.cache:
+            # Only return answers marked as correct
+            entry = self.cache[question_hash]
+            if entry.get("is_correct", False):
+                self.cache_hits += 1
+                return entry["answer"], True
+        self.cache_misses += 1
+        return None, False
+    def update(self, question, answer, is_correct=False):
+        """Update the cache with a new response.
+        Args:
+            question: The question text
+            answer: The agent's answer
+            is_correct: Whether the answer was correct
+        """
+        question_hash = self.get_hash(question)
+        self.cache[question_hash] = {
+            "question": question,
+            "answer": answer,
+            "is_correct": is_correct
+        }
+        self._save_cache()
+    def get_stats(self):
+        """Get cache statistics."""
+        total_entries = len(self.cache)
+        correct_entries = sum(1 for entry in self.cache.values() if entry.get("is_correct", False))
+        return {
+            "total_cached": total_entries,
+            "correct_cached": correct_entries,
+            "session_hits": self.cache_hits,
+            "session_misses": self.cache_misses
+        }
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results. Uses caching to avoid re-processing questions
+    with known correct answers.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # Initialize the cache
+    cache = ResponseCache()
+    print(f"Cache loaded. Stats: {cache.get_stats()}")
+    # 1. Instantiate Agent (only if needed)
+    agent = None  # We'll lazily initialize the agent only if needed
+    # In the case of an app running as a hugging Face space, this link points toward your codebase
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent (with cache)
     results_log = []
     answers_payload = []
+    cache_usage = {"hits": 0, "misses": 0}
+    print(f"Processing {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        # Try to get the answer from cache
+        cached_answer, is_cache_hit = cache.get(question_text)
+        if is_cache_hit:
+            # Use cached answer
+            submitted_answer = cached_answer
+            cache_usage["hits"] += 1
+            print(f"✅ Cache hit for task {task_id}. Using cached answer.")
+        else:
+            # Cache miss - run the agent
+            cache_usage["misses"] += 1
+            print(f"🔄 Cache miss for task {task_id}. Running agent...")
+            # Lazy initialization of agent
+            if agent is None:
+                try:
+                    print("Initializing agent...")
+                    agent = BasicAgent()
+                except Exception as e:
+                    print(f"Error instantiating agent: {e}")
+                    return f"Error initializing agent: {e}", None
+            try:
+                submitted_answer = agent(question_text)
+            except Exception as e:
+                print(f"Error running agent on task {task_id}: {e}")
+                submitted_answer = f"AGENT ERROR: {e}"
+        # Add to results and submission payload
+        answers_payload.append({
+            "task_id": task_id,
+            "submitted_answer": submitted_answer
+        })
+        results_log.append({
+            "Task ID": task_id,
+            "Question": question_text,
+            "Submitted Answer": submitted_answer,
+            "From Cache": is_cache_hit
+        })
     if not answers_payload:
+        print("No answers to submit.")
+        return "No answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload
+    }
+    status_update = (
+        f"Finished processing questions. "
+        f"Cache: {cache_usage['hits']} hits, {cache_usage['misses']} misses. "
+        f"Submitting {len(answers_payload)} answers for user '{username}'..."
+    )
     print(status_update)
+    # 5. Submit and update cache with results
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
+        # Update cache with correct answers
+        if "task_results" in result_data:
+            cache_updates = 0
+            for task_result in result_data["task_results"]:
+                task_id = task_result.get("task_id")
+                is_correct = task_result.get("is_correct", False)
+                # Find the matching question and answer
+                for item in questions_data:
+                    if item.get("task_id") == task_id:
+                        question = item.get("question")
+                        # Find the matching submitted answer
+                        for answer_item in answers_payload:
+                            if answer_item.get("task_id") == task_id:
+                                answer = answer_item.get("submitted_answer")
+                                # Only cache correct answers
+                                if is_correct:
+                                    cache.update(question, answer, is_correct=True)
+                                    cache_updates += 1
+                                break
+            print(f"Updated cache with {cache_updates} correct answers.")
+        # Prepare final status message
+        cache_stats = cache.get_stats()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Cache Performance: {cache_usage['hits']} hits, {cache_usage['misses']} misses\n"
+            f"Total Cached Correct Answers: {cache_stats['correct_cached']}\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
+        # Add cache information to results dataframe
         results_df = pd.DataFrame(results_log)
+        # If the response includes detailed results, add correctness to the DataFrame
+        if "task_results" in result_data:
+            # Create a mapping of task_id to correctness
+            correctness_map = {
+                result["task_id"]: result["is_correct"]
+                for result in result_data["task_results"]
+            }
+            # Add a column for correctness
+            results_df["Is Correct"] = results_df["Task ID"].map(
+                lambda x: correctness_map.get(x, "Unknown")
+            )
         return final_status, results_df
     except Exception as e:
+        status_message = f"Submission Failed: {str(e)}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner (with Caching)")
     gr.Markdown(
         """
         **Instructions:**
+        1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc...
+        2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        **Caching Enabled**: Correct answers are cached between runs to speed up evaluation.
         ---
         **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time (this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
         """
     )
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    # Display current cache status
+    cache = ResponseCache()
+    cache_stats = cache.get_stats()
+    gr.Markdown(
+        f"**Cache Status**: {cache_stats['correct_cached']} correct answers cached out of {cache_stats['total_cached']} total entries."
+    )
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
+# Add these imports to your existing imports
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)