agents_course_final_assignement

Paused

App Files Files Community

drAbreu commited on 28 days ago

Commit

f289100

1 Parent(s): e67eadd

Changed writer to gpt-4o-mini. Back to non cached due to issues

Browse files

Files changed (2) hide show

agents/llama_index_agent.py +9 -0
app.py +56 -257

agents/llama_index_agent.py CHANGED Viewed

@@ -260,8 +260,17 @@ def create_writer_agent(model_config: Dict[str, Any]) -> ReActAgent:
         When asked for "comma-separated list in alphabetical order": apple, banana, cherry
         When asked for "single number": 42
         When asked for "opposite of word 'right'": left
         REMEMBER: Your ENTIRE response should be just the answer - nothing more, nothing less.
         """,
         llm=llm
     )

         When asked for "comma-separated list in alphabetical order": apple, banana, cherry
         When asked for "single number": 42
         When asked for "opposite of word 'right'": left
+        When asked for "How many ...": eleven
+        When asked for "What says Yoda": "May the force be with you"
+        ## CONCRETE EXAMPLE:
+        When asked "The answer to the question of Universe, life and everything"
+            - WRONG ANSWER: The answer to the question is 42.
+            - RIGHT ANSWER: 42
         REMEMBER: Your ENTIRE response should be just the answer - nothing more, nothing less.
+        DO NOT EXPLAIN THE ANSWER. SIMPLY WRITE BACK THE ANSWER.
         """,
         llm=llm
     )

app.py CHANGED Viewed

@@ -119,124 +119,16 @@ class BasicAgent:
         return final_answer
-class ResponseCache:
-    """Cache manager for storing and retrieving agent responses with persistence across HF rebuilds."""
-    def __init__(self, cache_name="agent_responses"):
-        """Initialize the cache manager.
-        Args:
-            cache_name: Base name for the cache file
-        """
-        # Use /data directory for persistence in HF Spaces
-        # Fall back to local directory if running locally
-        if os.path.exists("/data") and os.access("/data", os.W_OK):
-            self.cache_dir = Path("/data")
-            print("Using HF Spaces persistent storage in /data directory")
-        else:
-            self.cache_dir = Path(".")
-            print("Using local directory for cache (not persistent across HF rebuilds)")
-        # Ensure directory exists
-        os.makedirs(self.cache_dir, exist_ok=True)
-        # Full path to cache file
-        self.cache_file = self.cache_dir / f"{cache_name}.json"
-        print(f"Cache file location: {self.cache_file}")
-        # Load the cache
-        self.cache = self._load_cache()
-        # Stats for the current session
-        self.cache_hits = 0
-        self.cache_misses = 0
-    def _load_cache(self):
-        """Load the cache from disk."""
-        try:
-            if os.path.exists(self.cache_file):
-                with open(self.cache_file, 'r') as f:
-                    cache_data = json.load(f)
-                    print(f"Cache loaded with {len(cache_data)} entries")
-                    return cache_data
-            print("No existing cache found, starting with empty cache")
-            return {}
-        except Exception as e:
-            print(f"Error loading cache: {e}. Starting with empty cache.")
-            return {}
-    def _save_cache(self):
-        """Save the cache to disk."""
-        try:
-            with open(self.cache_file, 'w') as f:
-                json.dump(self.cache, f)
-            print(f"Cache saved with {len(self.cache)} entries")
-        except Exception as e:
-            print(f"Error saving cache: {e}")
-    def get_hash(self, question):
-        """Create a consistent hash for a question."""
-        return hashlib.md5(question.encode('utf-8')).hexdigest()
-    def get(self, question):
-        """Get a cached response if available.
-        Returns:
-            tuple: (cached_answer, hit_status)
-              - cached_answer: The cached answer or None if not found
-              - hit_status: True if cache hit, False if miss
-        """
-        question_hash = self.get_hash(question)
-        if question_hash in self.cache:
-            # Only return answers marked as correct
-            entry = self.cache[question_hash]
-            if entry.get("is_correct", False):
-                self.cache_hits += 1
-                return entry["answer"], True
-        self.cache_misses += 1
-        return None, False
-    def update(self, question, answer, is_correct=False):
-        """Update the cache with a new response.
-        Args:
-            question: The question text
-            answer: The agent's answer
-            is_correct: Whether the answer was correct
-        """
-        question_hash = self.get_hash(question)
-        self.cache[question_hash] = {
-            "question": question,
-            "answer": answer,
-            "is_correct": is_correct
-        }
-        self._save_cache()
-    def get_stats(self):
-        """Get cache statistics."""
-        total_entries = len(self.cache)
-        correct_entries = sum(1 for entry in self.cache.values() if entry.get("is_correct", False))
-        return {
-            "total_cached": total_entries,
-            "correct_cached": correct_entries,
-            "session_hits": self.cache_hits,
-            "session_misses": self.cache_misses
-        }
-def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results. Uses caching to avoid re-processing questions
-    with known correct answers.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -245,17 +137,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # Initialize the cache
-    cache = ResponseCache()
-    print(f"Cache loaded. Stats: {cache.get_stats()}")
-    # 1. Instantiate Agent (only if needed)
-    agent = None  # We'll lazily initialize the agent only if needed
-    # In the case of an app running as a hugging Face space, this link points toward your codebase
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
@@ -277,144 +169,72 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent (with cache)
     results_log = []
     answers_payload = []
-    cache_usage = {"hits": 0, "misses": 0}
-    print(f"Processing {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        # Try to get the answer from cache
-        cached_answer, is_cache_hit = cache.get(question_text)
-        if is_cache_hit:
-            # Use cached answer
-            submitted_answer = cached_answer
-            cache_usage["hits"] += 1
-            print(f"✅ Cache hit for task {task_id}. Using cached answer.")
-        else:
-            # Cache miss - run the agent
-            cache_usage["misses"] += 1
-            print(f"🔄 Cache miss for task {task_id}. Running agent...")
-            # Lazy initialization of agent
-            if agent is None:
-                try:
-                    print("Initializing agent...")
-                    agent = BasicAgent()
-                except Exception as e:
-                    print(f"Error instantiating agent: {e}")
-                    return f"Error initializing agent: {e}", None
-            try:
-                submitted_answer = agent(question_text)
-            except Exception as e:
-                print(f"Error running agent on task {task_id}: {e}")
-                submitted_answer = f"AGENT ERROR: {e}"
-        # Add to results and submission payload
-        answers_payload.append({
-            "task_id": task_id,
-            "submitted_answer": submitted_answer
-        })
-        results_log.append({
-            "Task ID": task_id,
-            "Question": question_text,
-            "Submitted Answer": submitted_answer,
-            "From Cache": is_cache_hit
-        })
     if not answers_payload:
-        print("No answers to submit.")
-        return "No answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
-    submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
-        "answers": answers_payload
-    }
-    status_update = (
-        f"Finished processing questions. "
-        f"Cache: {cache_usage['hits']} hits, {cache_usage['misses']} misses. "
-        f"Submitting {len(answers_payload)} answers for user '{username}'..."
-    )
     print(status_update)
-    # 5. Submit and update cache with results
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
-        # Update cache with correct answers
-        if "task_results" in result_data:
-            cache_updates = 0
-            for task_result in result_data["task_results"]:
-                task_id = task_result.get("task_id")
-                is_correct = task_result.get("is_correct", False)
-                # Find the matching question and answer
-                for item in questions_data:
-                    if item.get("task_id") == task_id:
-                        question = item.get("question")
-                        # Find the matching submitted answer
-                        for answer_item in answers_payload:
-                            if answer_item.get("task_id") == task_id:
-                                answer = answer_item.get("submitted_answer")
-                                # Only cache correct answers
-                                if is_correct:
-                                    cache.update(question, answer, is_correct=True)
-                                    cache_updates += 1
-                                break
-            print(f"Updated cache with {cache_updates} correct answers.")
-        # Prepare final status message
-        cache_stats = cache.get_stats()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Cache Performance: {cache_usage['hits']} hits, {cache_usage['misses']} misses\n"
-            f"Total Cached Correct Answers: {cache_stats['correct_cached']}\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
-        # Add cache information to results dataframe
         results_df = pd.DataFrame(results_log)
-        # If the response includes detailed results, add correctness to the DataFrame
-        if "task_results" in result_data:
-            # Create a mapping of task_id to correctness
-            correctness_map = {
-                result["task_id"]: result["is_correct"]
-                for result in result_data["task_results"]
-            }
-            # Add a column for correctness
-            results_df["Is Correct"] = results_df["Task ID"].map(
-                lambda x: correctness_map.get(x, "Unknown")
-            )
         return final_status, results_df
     except Exception as e:
-        status_message = f"Submission Failed: {str(e)}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
@@ -422,21 +242,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner (with Caching)")
     gr.Markdown(
         """
         **Instructions:**
-        1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc...
-        2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        **Caching Enabled**: Correct answers are cached between runs to speed up evaluation.
         ---
         **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time (this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
         """
     )
@@ -445,24 +261,14 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    # Display current cache status
-    cache = ResponseCache()
-    cache_stats = cache.get_stats()
-    gr.Markdown(
-        f"**Cache Status**: {cache_stats['correct_cached']} correct answers cached out of {cache_stats['total_cached']} total entries."
-    )
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
-# Add these imports to your existing imports
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
@@ -484,12 +290,5 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
-    # Check cache persistence
-    cache = ResponseCache()
-    stats = cache.get_stats()
-    print(f"Cache loaded with {stats['correct_cached']} correct answers out of {stats['total_cached']} total entries")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)

         return final_answer
+def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
+        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent ( modify this part to create your agent)
+    try:
+        agent = BasicAgent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
     # 2. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
     results_log = []
     answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+        except Exception as e:
+             print(f"Error running agent on task {task_id}: {e}")
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
+    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
             f"User: {result_data.get('username')}\n"
             f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
             f"Message: {result_data.get('message', 'No message received.')}"
         )
+        print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
     except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)