Final_Assignment_Template

Sleeping

App Files Files Community

TzurVaich commited on 13 days ago

Commit

e62e166

1 Parent(s): e6036f2

Work In Progress

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +56 -52
test_agents,py +0 -42
test_agents.py +193 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 .env
 __pycache__/

 .env
 __pycache__/
+logs/

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import re
 import os
 import gradio as gr
@@ -9,15 +10,10 @@ import pandas as pd
 from dotenv import load_dotenv
 # Import smolagents components
-from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool
 # Load environment variables from .env file
 load_dotenv()
-print(os.getenv("HF_TOKEN"))
 # (Keep Constants as is)
 # --- Constants ---
@@ -33,26 +29,33 @@ search_tool = DuckDuckGoSearchTool()
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         # Create a filename with current date and time
         current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M")
         self.filename = f"questions_{current_time}.txt"
-        print(f"Questions will be written to {self.filename}")
-        # Clear the file if it exists or create a new one
-        with open(self.filename, 'w', encoding='utf-8') as f:
-            f.write('')  # Create empty file
         # Initialize the Large Language Model
         # The model is used by both agents in this simple setup
         self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
         # Define the Web Search Agent
         # This agent is specialised for searching the web using a specific tool
         self.web_search_agent = CodeAgent(
             model=self.model,  # Assign the model to the agent [
-            tools=[DuckDuckGoSearchTool()],  # Provide the web search tool
             name="web_search_agent",  # Give the agent a name
             # Describe its capability [
             description="Searches the web for information.",
@@ -64,7 +67,7 @@ class BasicAgent:
         # This agent manages tasks and delegates to other agents
         self.manager_agent = CodeAgent(
             model=self.model,  # Assign the model to the manager
-            tools=[],
             managed_agents=[self.web_search_agent],  # Specify the agents this manager oversees
             name="manager_agent",  # Give the manager agent a name
             description="Manages tasks by delegating to other agents.",  # Describe its role
@@ -82,63 +85,64 @@ class BasicAgent:
         # For all other questions, use the manager agent with web search
-        manager_prompt = dedent(f"""
-            I need to answer the following question accurately:
-            {question}
-            Please analyze this question and determine the best approach to answer it.
-            If needed, use web search to find relevant information.
-            Provide a concise, accurate answer to the question.
         """)
         manager_agent_response = "I apologize, but I couldn't find an answer to this question."
-        answer = ""
         source = ""
         try:
             manager_agent_response = self.manager_agent.run(manager_prompt)
             source = "manager_agent"
-            # # Clean up the answer - remove explanations, etc.
-            # if len(manager_agent_response) > 300:
-            #     # Try to extract just the answer
-            #     answer_pattern = r'(?:answer|result)(?:\s+is)?(?:\s*:)?\s*(.+?)(?:\.|$)'
-            #     answer_match = re.search(answer_pattern, manager_agent_response, re.IGNORECASE)
-            #     if answer_match:
-            #         answer = answer_match.group(1).strip()
-            #     else:
-            #         # Get the last paragraph
-            #         paragraphs = [p for p in answer.split('\n') if p.strip()]
-            #         if paragraphs:
-            #             answer = paragraphs[-1].strip()
-            #     source = "long_answer"
-            #return answer
         except Exception as e:
             print(f"Error in manager agent: {e}")
             source = f"Exception {e} "
-            # # Fall back to direct web search
-            # try:
-            #     answer = self.web_search_agent.run(f"Please find accurate information to answer: {question}")
-            #     source = "web_search_agent"
-            # except Exception as e2:
-            #     print(f"Error in web agent: {e2}")
-            #     answer="I apologize, but I couldn't find an answer to this question."
         # Append the question to the file
-        with open(self.filename, 'a', encoding='utf-8') as f:
-            f.write(f"{question}\n")
-            f.write(f"ANSWER by {source}: {manager_agent_response}\n")
-            f.write(f"{'*'*50}\n")
         print(f"Final answer: {manager_agent_response}")
         return manager_agent_response
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

+import textwrap
 import re
 import os
 import gradio as gr
 from dotenv import load_dotenv
 # Import smolagents components
+from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, FinalAnswerTool
 # Load environment variables from .env file
 load_dotenv()
 # (Keep Constants as is)
 # --- Constants ---
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
+        self.store_questions_to_log_file = False
         # Create a filename with current date and time
         current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M")
         self.filename = f"questions_{current_time}.txt"
+        if self.store_questions_to_log_file:
+            print(f"Questions will be written to {self.filename}")
+            # Clear the file if it exists or create a new one
+            with open(self.filename, 'w', encoding='utf-8') as f:
+                f.write('')  # Create empty file
         # Initialize the Large Language Model
         # The model is used by both agents in this simple setup
         self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+        # mistralai/Mixtral-8x7B-Instruct-v0.1
+        #self.model = HfApiModel(model_id="mistralai/Mixtral-8x7B-Instruct-v0.1")
         # Define the Web Search Agent
         # This agent is specialised for searching the web using a specific tool
         self.web_search_agent = CodeAgent(
             model=self.model,  # Assign the model to the agent [
+            tools=[DuckDuckGoSearchTool(),
+                   FinalAnswerTool()],  # Provide the web search tool
             name="web_search_agent",  # Give the agent a name
             # Describe its capability [
             description="Searches the web for information.",
         # This agent manages tasks and delegates to other agents
         self.manager_agent = CodeAgent(
             model=self.model,  # Assign the model to the manager
+            tools=[FinalAnswerTool()],
             managed_agents=[self.web_search_agent],  # Specify the agents this manager oversees
             name="manager_agent",  # Give the manager agent a name
             description="Manages tasks by delegating to other agents.",  # Describe its role
         # For all other questions, use the manager agent with web search
+        # manager_prompt = dedent(f"""
+        #     I need to answer the following question accurately:
+        #     {question}
+        #     Please analyze this question and determine the best approach to answer it.
+        #     If needed, use web search to find relevant information.
+        #     Provide a concise, accurate answer to the question.
+        # """)
+        manager_prompt = textwrap.dedent(f"""
+    I need to answer the following question accurately:
+    {question}
+    Please analyze this question and determine the best approach to answer it.
+    If needed, use web search to find relevant information.
+    Provide a concise, accurate answer to the question.
+    IMPORTANT: If you identify that specialized tools are needed that you don't have access to, respond with:
+    "Missing Tool Warning: Can't process the question. Missing tool for [specify the missing capability]."
+    Examples of missing capabilities to check for:
+    - YouTube video analysis (if question mentions YouTube videos)
+    - Image analysis (if question refers to analyzing images)
+    - Audio file processing (if question refers to audio files)
+    - Excel/spreadsheet analysis (if question refers to Excel files)
+    - Chess position analysis (if question refers to chess positions)
+    - Code execution (if question requires running Python code)
+    Only use the "Missing Tool Warning" format if you CANNOT answer the question with your available tools.
+    If you can answer the question with web search or your existing knowledge, provide the answer.
         """)
         manager_agent_response = "I apologize, but I couldn't find an answer to this question."
         source = ""
         try:
             manager_agent_response = self.manager_agent.run(manager_prompt)
             source = "manager_agent"
+            # Check if the answer contains a missing tool warning
+            # if "Missing Tool Warning:" in manager_agent_response:
+            #    return manager_agent_response
         except Exception as e:
             print(f"Error in manager agent: {e}")
             source = f"Exception {e} "
         # Append the question to the file
+        if self.store_questions_to_log_file:
+            with open(self.filename, 'a', encoding='utf-8') as f:
+                f.write(f"{question}\n")
+                f.write(f"ANSWER by {source}: {manager_agent_response}\n")
+                f.write(f"{'*'*50}\n")
         print(f"Final answer: {manager_agent_response}")
         return manager_agent_response
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

test_agents,py DELETED Viewed

@@ -1,42 +0,0 @@
-# test_agents,py
-import os
-from dotenv import load_dotenv
-# Import the BasicAgent from your app module
-try:
-    from app import BasicAgent
-except ImportError as e:
-    print(f"Error importing BasicAgent from app.py: {e}")
-    print("Please ensure app.py is in the same directory or accessible in the Python path.")
-    exit(1)
-# --- Define Question-Answer Pairs ---
-# Note: The 'A' part is just for reference here; the agent will generate its own answer.
-QA_PAIRS = {
-    "What is the capital of France?": "Paris",
-    "Who wrote 'Hamlet'?": "William Shakespeare",
-    "What is the formula for water?": "H2O",
-    "How does photosynthesis work?": "Plants use sunlight, water, and carbon dioxide to create their own food.",
-    # Agent should find current data
-    "What is the current population of Earth?": "Approximately 8 billion",
-}
-def run_test_questions():
-    """Instantiates the agent and runs it on the predefined questions."""
-    print("--- Starting Agent Test ---")
-    # Load environment variables (needed for BasicAgent initialization)
-    load_dotenv()
-    print(f"HF_TOKEN found: {'Yes' if os.getenv('HF_TOKEN') else 'No'}")
-    agent = BasicAgent()
-    for question in QA_PAIRS.keys():
-        print(f"\n--- Testing Question ---")
-        print(f"Q: {question}")
-        answer = agent(question)  # Call the agent instance
-        print(f"Agent A: {answer}")
-if __name__ == "__main__":
-    run_test_questions()

test_agents.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# test_agents,py
+import json
+import time
+import datetime
+import os
+from dotenv import load_dotenv
+from app import BasicAgent  # Assuming app.py is accessible
+# Import the BasicAgent from your app module
+try:
+    from app import BasicAgent
+except ImportError as e:
+    print(f"Error importing BasicAgent from app.py: {e}")
+    print("Please ensure app.py is in the same directory or accessible in the Python path.")
+    exit(1)
+# --- Define Question-Answer Pairs ---
+# Note: The 'A' part is just for reference here; the agent will generate its own answer.
+QA_PAIRS = {
+    "What is the capital of France?": "Paris",
+    "Who wrote 'Hamlet'?": "William Shakespeare",
+    "What is the formula for water?": "H2O",
+    "How does photosynthesis work?": "Plants use sunlight, water, and carbon dioxide to create their own food.",
+    # Agent should find current data
+    "What is the current population of Earth?": "Approximately 8 billion",
+}
+def eval_GAIA(json_file_path="GAIA_level1_status.json"):
+    """
+    Loads GAIA level 1 questions from a JSON file, evaluates unanswered
+    questions using the BasicAgent, logs incorrect answers, updates the
+    status in the JSON data, and saves the updated data.
+    Args:
+        json_file_path (str): The path to the GAIA status JSON file.
+                              Defaults to "GAIA_level1_status.json".
+    """
+    print(f"--- Starting GAIA Evaluation from {json_file_path} ---")
+    tmp_json_file_path = json_file_path.replace(".json", "_tmp.json")
+    # 2. Load GAIA data
+    try:
+        with open(json_file_path, 'r', encoding='utf-8') as f:
+            gaia_data = json.load(f)
+        print(
+            f"Successfully loaded {len(gaia_data)} questions from {json_file_path}.")
+    except FileNotFoundError:
+        print(f"Error: JSON file not found at {json_file_path}")
+        return
+    except json.JSONDecodeError:
+        print(f"Error: Could not decode JSON from {json_file_path}")
+        return
+    except Exception as e:
+        print(f"An unexpected error occurred loading the JSON file: {e}")
+        return
+    # 3. Initialize Agent, Log file, and Tracking variables
+    try:
+        agent = BasicAgent()
+    except Exception as e:
+        print(f"Error initializing BasicAgent: {e}")
+        print("Evaluation cannot proceed.")
+        return
+    log_filename = f"Response_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}.log"
+    print(f"Incorrect answers will be logged to: {log_filename}")
+    total_questions = len(gaia_data)
+    processed_count = 0
+    correct_count = 0
+    initially_correct = sum(1 for item in gaia_data.values()
+                            if item.get("status") is True)
+    questions_to_process = total_questions - initially_correct
+    print(f"Found {initially_correct} questions already marked as correct.")
+    if questions_to_process == 0:
+        print("No questions with status=false found to process.")
+        # Still save the file in case formatting needs update, or just return
+        # For consistency, let's save it.
+    else:
+        print(f"Attempting to answer {questions_to_process} questions...")
+    start_time = time.time()
+    # 4. Process questions
+    for q_num, data in gaia_data.items():
+        if data.get("status") is False:
+            processed_count += 1
+            question = data.get("Q")
+            correct_answer = data.get("A")
+            status = data.get("status")  # Should be False here
+            if question is None or correct_answer is None:
+                print(f"Skipping question {q_num}: Missing 'Q' or 'A'.")
+                continue
+            elapsed_time = time.time() - start_time
+            print(
+                f"\nProcessing question {processed_count}/{questions_to_process} (ID: {q_num}) | Elapsed: {elapsed_time:.2f}s")
+            print(f"Q: {question[:100]}...")  # Print first 100 chars
+            try:
+                agent_response = agent(question)
+                print(f"Agent A: {agent_response}")
+                print(f"Correct A: {correct_answer}")
+                # Simple comparison (case-sensitive, exact match)
+                # Consider adding .strip() or lower() for more robust comparison if needed
+                if str(agent_response).strip() == str(correct_answer).strip():
+                    print(f"Result for Q {q_num}: CORRECT")
+                    gaia_data[q_num]["status"] = True
+                    correct_count += 1
+                else:
+                    print(f"Result for Q {q_num}: INCORRECT")
+                    # Append to log file
+                    with open(log_filename, 'a', encoding='utf-8') as log_f:
+                        log_f.write(f"*question number {q_num} *\n")
+                        log_f.write(f"Q: {question}\n")
+                        log_f.write(f"A: {correct_answer}\n")
+                        log_f.write(f"Agent: {agent_response}\n")
+                        log_f.write("<END>\n\n")
+            except Exception as e:
+                print(f"Error processing question {q_num} with agent: {e}")
+                # Optionally log agent errors too
+                with open(log_filename, 'a', encoding='utf-8') as log_f:
+                    log_f.write(f"*question number {q_num} *\n")
+                    log_f.write(f"Q: {question}\n")
+                    log_f.write(f"A: {correct_answer}\n")
+                    log_f.write(f"Agent: ERROR - {e}\n")
+                    log_f.write("<END>\n\n")
+            with open(tmp_json_file_path, 'w', encoding='utf-8') as f:
+                json.dump(gaia_data, f, indent=4, ensure_ascii=False)
+        else:
+            correct_count += 1
+            print(f"Skipping question {q_num}: Status is already True.")
+        # Exit
+        #break
+    end_time = time.time()
+    total_time = end_time - start_time
+    # 5. Summary
+    print("\n--- Evaluation Summary ---")
+    print(f"Processed {processed_count} questions with status=false.")
+    print(f"Correct answers provided by agent: {correct_count}")
+    final_correct_count = initially_correct + correct_count
+    print(
+        f"Total correct answers (initial + agent): {final_correct_count}/{total_questions}")
+    print(f"Total evaluation time: {total_time:.2f} seconds")
+    # 6. Save updated data
+    try:
+        with open(json_file_path, 'w', encoding='utf-8') as f:
+            json.dump(gaia_data, f, indent=4, ensure_ascii=False)
+        print(f"Successfully saved updated data to {json_file_path}")
+    except Exception as e:
+        print(f"Error saving updated data to {json_file_path}: {e}")
+    print("--- GAIA Evaluation Finished ---")
+def run_test_questions():
+    """Instantiates the agent and runs it on the predefined questions."""
+    print("--- Starting Agent Test ---")
+    # Load environment variables (needed for BasicAgent initialization)
+    #load_dotenv()
+    #print(f"HF_TOKEN found: {'Yes' if os.getenv('HF_TOKEN') else 'No'}")
+    agent = BasicAgent()
+    for question in QA_PAIRS.keys():
+        print(f"\n--- Testing Question ---")
+        print(f"Q: {question}")
+        answer = agent(question)  # Call the agent instance
+        print(f"Agent A: {answer}")
+if __name__ == "__main__":
+    load_dotenv()
+    if not os.getenv('HF_TOKEN'):
+        print("Warning: HF_TOKEN environment variable not found. Agent might fail.")
+    # run_test_questions()
+    #
+    eval_GAIA()