agents_course_final_assignement

Paused

App Files Files Community

drAbreu commited on 25 days ago

Commit

227dcb0

1 Parent(s): 56a4634

Review agent added

Browse files

Files changed (3) hide show

agents/llama_index_agent.py +114 -42
app.py +35 -12
tools/multimedia_tools.py +1 -1

agents/llama_index_agent.py CHANGED Viewed

@@ -197,7 +197,6 @@ class GaiaAgent(ReActAgent):
         query: What is the first name of the scientist who discovered penicillin?
         research_notes: After researching, I found that Sir Alexander Fleming discovered penicillin in 1928. The full answer is "Alexander Fleming" but the question only asks for the first name, which is "Alexander".
-        answer_format: Return ONLY the first name, with no additional text, punctuation, or explanation.
         ```
         IMPORTANT: NEVER provide the final answer directly to the user. ALWAYS hand off to the writer_agent for proper formatting.
@@ -222,21 +221,20 @@ def create_writer_agent(model_config: Dict[str, Any]) -> ReActAgent:
         llm = OpenAI(
             model=model_name,
             api_key=api_key or os.getenv("OPENAI_API_KEY"),
-            max_tokens=128,
             temperature=0.1,
             additional_kwargs={
-                "max_tokens": 128,
-                "temperature": 0.5}
-            )
     elif model_provider.lower() == "anthropic":
         llm = Anthropic(
             model=model_name,
             api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),
-            temperature=1.0 if "3-7" in model_name else 0.5,
-            thinking_dict={"type": "enabled", "budget_tokens": 5112} if "3-7" in model_name else None,
-            max_tokens=2048*4,
         )
     else:
         raise ValueError(f"Unsupported model provider for writer agent: {model_provider}")
@@ -244,50 +242,124 @@ def create_writer_agent(model_config: Dict[str, Any]) -> ReActAgent:
     # Create and return the writer agent
     return ReActAgent(
         name="writer_agent",
-        description="Formats the final answer exactly as specified for GAIA benchmark questions",
         system_prompt="""
-        You are a specialized formatting agent for the GAIA benchmark. Your ONLY job is to take the research from the main agent and format the answer EXACTLY as required by the benchmark question.
         ## YOUR ROLE
         You will receive:
         - query: The original question
         - research_notes: The main agent's complete analysis and reasoning
-        - answer_format: Specific formatting instructions for the final answer
-        ## CRITICAL RULES
-        1. Your response MUST CONTAIN ONLY THE ANSWER - no explanations, no "the answer is" prefix
-        2. Follow the answer_format instructions precisely
-        3. Remove ALL unnecessary characters, spaces, punctuation, or wording
-        4. If asked for a name, provide ONLY the name
-        5. If asked for a number, provide ONLY the number
-        6. If asked for a list, format it EXACTLY as specified (comma-separated, alphabetical, etc.)
-        7. NEVER include your own thoughts or analysis
-        8. NEVER add preamble or conclusion text
-        ## EXAMPLES OF CORRECT RESPONSES:
-        When asked for "first name only": Alexander
-        When asked for "comma-separated list in alphabetical order": apple, banana, cherry
-        When asked for "single number": 42
-        When asked for "opposite of word 'right'": left
-        When asked for "How many ...": eleven
-        When asked for "What says Yoda": "May the force be with you"
-        ## CONCRETE EXAMPLE:
-        When asked "The answer to the question of Universe, life and everything"
-            - WRONG ANSWER: The answer to the question is 42.
-            - RIGHT ANSWER: 42
-        - For question `How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.`:
-            - WRONG ANSWER : `She released three studio albums in that period – Misa Criolla (2000), Corazón Libre (2005) and Cantora (2009).`
-            - RIGHT ANSWER: `Three`
-        - For question `"Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec. What does Teal'c say in response to the question "Isn't that hot?"`:
-            - WRONG ANSWER: `"He replies, “Extremely.”"`
-            - RIGHT ANSWER: `Extremely`
-        REMEMBER: Your ENTIRE response should be just the answer - nothing more, nothing less.
-        DO NOT EXPLAIN THE ANSWER. SIMPLY WRITE BACK THE ANSWER.
         """,
         llm=llm
     )

         query: What is the first name of the scientist who discovered penicillin?
         research_notes: After researching, I found that Sir Alexander Fleming discovered penicillin in 1928. The full answer is "Alexander Fleming" but the question only asks for the first name, which is "Alexander".
         ```
         IMPORTANT: NEVER provide the final answer directly to the user. ALWAYS hand off to the writer_agent for proper formatting.
         llm = OpenAI(
             model=model_name,
             api_key=api_key or os.getenv("OPENAI_API_KEY"),
+            max_tokens=256,
             temperature=0.1,
             additional_kwargs={
+                "max_tokens": 256,
+                "temperature": 0.1
+            }
+        )
     elif model_provider.lower() == "anthropic":
         llm = Anthropic(
             model=model_name,
             api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),
+            temperature=0.1,
+            thinking_dict={"type": "enabled", "budget_tokens": 1024} if "3-7" in model_name else None,
+            max_tokens=1024
         )
     else:
         raise ValueError(f"Unsupported model provider for writer agent: {model_provider}")
     # Create and return the writer agent
     return ReActAgent(
         name="writer_agent",
+        description="Formats the final answer based on research notes for GAIA benchmark questions",
         system_prompt="""
+        You are a specialized formatting agent for the GAIA benchmark. Your job is to take the research from the main agent and format the answer according to the benchmark requirements.
         ## YOUR ROLE
         You will receive:
         - query: The original question
         - research_notes: The main agent's complete analysis and reasoning
+        ## FORMATTING RULES
+        1. Format the answer according to the instructions in the `query` received
+        2. Your answers will be always as minimal as necessary to answer the question
+        2. Try to remove unnecessary characters, spaces, or wording
+        3. If asked for a name, provide **ONLY** the name
+        4. If asked for a number, provide the **ONLY** number
+        5. If asked for a list, format it exactly as specified
+        ## DELEGATION TO REVIEW AGENT
+        After formatting your answer, ALWAYS delegate to the review_agent with:
+        - query: The original question
+        - formatted_answer: Your formatted answer
+        Example handoff to review_agent:
+        ```
+        I'll delegate to review_agent for final review.
+        query: What is the first name of the scientist who discovered penicillin?
+        formatted_answer: Alexander
+        format_requirements: Return ONLY the first name, with no additional text.
+        ```
+        IMPORTANT: ALWAYS hand off to the review_agent for final verification and cleanup.
+        """,
+        llm=llm,
+        can_handoff_to=["review_agent"]
+    )
+def create_review_agent(model_config: Dict[str, Any]) -> ReActAgent:
+    """
+    Create a review agent that ensures the final answer follows exact formatting requirements.
+    Args:
+        model_config: Dictionary containing model_provider, model_name, and api_key
+    Returns:
+        A configured ReActAgent for final answer review and formatting
+    """
+    # Initialize LLM based on the provided configuration
+    model_provider = model_config.get("model_provider", "openai")
+    model_name = model_config.get("model_name", "gpt-4o-mini")
+    api_key = model_config.get("api_key")
+    if model_provider.lower() == "openai":
+        llm = OpenAI(
+            model=model_name,
+            api_key=api_key or os.getenv("OPENAI_API_KEY"),
+            max_tokens=128,
+            temperature=0.0,  # Use 0 temperature for deterministic output
+            additional_kwargs={
+                "max_tokens": 128,
+                "temperature": 0.0
+            }
+        )
+    elif model_provider.lower() == "anthropic":
+        llm = Anthropic(
+            model=model_name,
+            api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),
+            temperature=0.0,  # Use 0 temperature for deterministic output
+            thinking_dict={"type": "enabled", "budget_tokens": 1024} if "3-7" in model_name else None,
+            max_tokens=128  # Keep token limit low for final answers
+        )
+    else:
+        raise ValueError(f"Unsupported model provider for review agent: {model_provider}")
+    # Create and return the review agent
+    return ReActAgent(
+        name="review_agent",
+        description="Ensures the final answer is formatted exactly as required, removing any unnecessary information",
+        system_prompt="""
+        You are the final review agent for the GAIA benchmark. Your ONLY job is to ensure the answer is in the EXACT format required. This is EXTREMELY important for benchmark scoring.
+        ## YOUR ROLE
+        You will receive:
+        - query: The original question
+        - formatted_answer: The answer formatted by the writer agent
+        ## CRITICAL RULES
+        1. Your ENTIRE response must be ONLY the final answer - NOTHING ELSE
+        2. Remove ALL of the following:
+           - Explanations like "The answer is..." or "I found that..."
+           - Quotation marks (unless explicitly required)
+           - Punctuation at the end (unless explicitly required)
+           - Unnecessary whitespace
+        3. If no specific format is mentioned, make the answer as minimal as possible:
+           - For names/words: just the name/word (e.g., "Paris")
+           - For numbers: just the number (e.g., "42")
+           - For lists: comma-separated values (e.g., "apple, banana, cherry")
+        4. NEVER add ANY commentary, explanation, or additional information
+        5. Double-check for exact formatting requirements like:
+           - Numerical format (e.g., "42" vs "forty-two")
+           - Case sensitivity (e.g., "PARIS" vs "Paris")
+           - List formatting (e.g., comma-separated vs numbered)
+        ## OUTPUT EXAMPLES
+        - Input: "The answer is Alexander."
+          Output: Alexander
+        - Input: "The result is 42 because..."
+          Output: 42
+        - Input: "The capital of France is Paris."
+          Output: Paris
+        - Input: "I found that it's eleven."
+          Output: eleven
+        - Input: "These actors starred in the film: Tom Hanks, Meg Ryan, and Bill Pullman."
+          Output: Tom Hanks, Meg Ryan, Bill Pullman
+        - Input: "She published studio albums "Album 1", "Album 2", "Album 3", so in total 3."
+          Output: 3
+        REMEMBER: Your ENTIRE response should be just the bare answer with NOTHING else.
         """,
         llm=llm
     )

app.py CHANGED Viewed

@@ -28,17 +28,18 @@ OPENAI = {
 class BasicAgent:
     def __init__(
             self,
-            # model_provider="anthropic",
-            # model_name="claude-3-7-sonnet-latest",
-            model_provider="openai",
-            model_name="o4-mini",
             api_key=None,
             use_separate_writer_model=True,
             writer_model_provider="openai",
-            writer_model_name="gpt-4o-mini"
             ):
         """
-        Initialize the BasicAgent with a multi-agent workflow.
         Args:
             model_provider: LLM provider for main agent
@@ -47,6 +48,9 @@ class BasicAgent:
             use_separate_writer_model: Whether to use a different model for the writer agent
             writer_model_provider: LLM provider for writer agent (if separate)
             writer_model_name: Model name for writer agent (if separate)
         """
         # Configure the main reasoning agent
         main_model_config = {
@@ -64,22 +68,37 @@ class BasicAgent:
             }
         else:
             writer_model_config = main_model_config
-        # Create the main agent
         self.main_agent = GaiaAgent(**main_model_config)
-        # Create the writer agent
         self.writer_agent = create_writer_agent(writer_model_config)
         # Set up the agent workflow with shared context
         self.agent_workflow = AgentWorkflow(
-            agents=[self.main_agent, self.writer_agent],
             root_agent=self.main_agent.name,
             initial_state={
                 "original_question": "",
                 "analysis_notes": "",
                 "format_requirements": "",
                 "next_agent": "",
                 "final_answer": ""
             }
         )
@@ -89,7 +108,11 @@ class BasicAgent:
             print(f"Writer agent using: {writer_model_provider} {writer_model_name}")
         else:
             print(f"Writer agent using same model as main agent")
     def __call__(self, question_data: dict) -> str:
         """Process a GAIA benchmark question and return the formatted answer."""
         # Extract question text and task_id
@@ -141,7 +164,7 @@ class BasicAgent:
         # Extract the final answer from the writer agent's response
         final_answer = response.response.blocks[-1].text
-        print(f"Agent returning answer: {final_answer}")
         return final_answer
     def download_task_file(self, question_data: dict) -> str:

 class BasicAgent:
     def __init__(
             self,
+            model_provider="anthropic",
+            model_name="claude-3-7-sonnet-latest",
             api_key=None,
             use_separate_writer_model=True,
             writer_model_provider="openai",
+            writer_model_name="gpt-4o-mini",
+            use_separate_review_model=True,
+            review_model_provider="openai",
+            review_model_name="gpt-4o-mini"
             ):
         """
+        Initialize the BasicAgent with a three-agent workflow.
         Args:
             model_provider: LLM provider for main agent
             use_separate_writer_model: Whether to use a different model for the writer agent
             writer_model_provider: LLM provider for writer agent (if separate)
             writer_model_name: Model name for writer agent (if separate)
+            use_separate_review_model: Whether to use a different model for the review agent
+            review_model_provider: LLM provider for review agent (if separate)
+            review_model_name: Model name for review agent (if separate)
         """
         # Configure the main reasoning agent
         main_model_config = {
             }
         else:
             writer_model_config = main_model_config
+        # Configure the review agent (either same as main or different)
+        if use_separate_review_model:
+            review_model_config = {
+                "model_provider": review_model_provider,
+                "model_name": review_model_name,
+                "api_key": api_key  # Use same API key for simplicity
+            }
+        else:
+            review_model_config = main_model_config
+        # Create the agents
         self.main_agent = GaiaAgent(**main_model_config)
         self.writer_agent = create_writer_agent(writer_model_config)
+        self.review_agent = create_review_agent(review_model_config)
+        # Update the GaiaAgent's can_handoff_to to include review_agent
+        self.main_agent.can_handoff_to = ["writer_agent", "review_agent"]
         # Set up the agent workflow with shared context
         self.agent_workflow = AgentWorkflow(
+            agents=[self.main_agent, self.writer_agent, self.review_agent],
             root_agent=self.main_agent.name,
             initial_state={
                 "original_question": "",
+                "task_id": "",
+                "audio_file_path": "",
                 "analysis_notes": "",
                 "format_requirements": "",
                 "next_agent": "",
+                "formatted_answer": "",
                 "final_answer": ""
             }
         )
             print(f"Writer agent using: {writer_model_provider} {writer_model_name}")
         else:
             print(f"Writer agent using same model as main agent")
+        if use_separate_review_model:
+            print(f"Review agent using: {review_model_provider} {review_model_name}")
+        else:
+            print(f"Review agent using same model as main agent")
     def __call__(self, question_data: dict) -> str:
         """Process a GAIA benchmark question and return the formatted answer."""
         # Extract question text and task_id
         # Extract the final answer from the writer agent's response
         final_answer = response.response.blocks[-1].text
+        print(f"Agent returning final answer: {final_answer}")
         return final_answer
     def download_task_file(self, question_data: dict) -> str:

tools/multimedia_tools.py CHANGED Viewed

@@ -195,7 +195,7 @@ class VisionAnalyzerAgent:
             try:
                 response = self.client.chat.completions.create(
                     model=self.model_name,
-                    max_tokens=1024,
                     messages=[
                         {
                             "role": "user",

             try:
                 response = self.client.chat.completions.create(
                     model=self.model_name,
+                    max_tokens=1024*20,
                     messages=[
                         {
                             "role": "user",