Final_Assignment_Template

Build error

File size: 9,989 Bytes

import logging

import pytest
import requests
from langgraph.types import Command

from agent import AgentRunner

# Configure test logger
test_logger = logging.getLogger("test_agent")
test_logger.setLevel(logging.INFO)

# Suppress specific warnings
pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning:httpx._models")

# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"


@pytest.fixture(scope="session")
def agent():
    """Fixture to create and return an AgentRunner instance."""
    test_logger.info("Creating AgentRunner instance")
    return AgentRunner()


# @pytest.fixture(scope="session")
# def questions_data():
#     """Fixture to fetch questions from the API."""
#     test_logger.info(f"Fetching questions from: {QUESTIONS_URL}")
#     try:
#         response = requests.get(QUESTIONS_URL, timeout=15)
#         response.raise_for_status()
#         data = response.json()
#         if not data:
#             test_logger.error("Fetched questions list is empty.")
#             return []
#         test_logger.info(f"Fetched {len(data)} questions.")
#         return data
#     except requests.exceptions.RequestException as e:
#         test_logger.error(f"Error fetching questions: {e}")
#         return []
#     except requests.exceptions.JSONDecodeError as e:
#         test_logger.error(f"Error decoding JSON response from questions endpoint: {e}")
#         return []
#     except Exception as e:
#         test_logger.error(f"An unexpected error occurred fetching questions: {e}")
#         return []
#
# class TestAppQuestions:
#     """Test cases for questions from the app."""
#
#     def test_first_app_question(self, agent, questions_data):
#         """Test the agent's response to the first app question."""
#         if not questions_data:
#             pytest.skip("No questions available from API")
#
#         first_question = questions_data[0]
#         question_text = first_question.get("question")
#         task_id = first_question.get("task_id")
#
#         if not question_text or not task_id:
#             pytest.skip("First question is missing required fields")
#
#         test_logger.info(f"Testing with app question: {question_text}")
#
#         response = agent(question_text)
#         test_logger.info(f"Agent response: {response}")
#
#         # Check that the response contains the expected information
#         assert "Mercedes Sosa" in response, "Response should mention Mercedes Sosa"
#         assert "studio albums" in response.lower(), "Response should mention studio albums"
#         assert "2000" in response and "2009" in response, "Response should mention the year range"
#
#         # Verify that a number is mentioned (either as word or digit)
#         import re
#         number_pattern = r'\b(one|two|three|four|five|six|seven|eight|nine|ten|\d+)\b'
#         has_number = bool(re.search(number_pattern, response.lower()))
#         assert has_number, "Response should include the number of albums"
#
#         # Check for album names in the response
#         known_albums = [
#             "Corazón Libre",
#             "Cantora",
#             "Hermano",
#             "Acústico",
#             "Argentina quiere cantar"
#         ]
#         found_albums = [album for album in known_albums if album in response]
#         assert len(found_albums) > 0, "Response should mention at least some of the known albums"
#
#         # Check for a structured response
#         assert re.search(r'\d+\.\s+[^(]+\(\d{4}\)', response), \
#             "Response should list albums with years"


class TestBasicCodeAgentCapabilities:
    """Test basic capabilities of the code agent."""

    def setup_method(self):
        """Setup method to initialize the agent before each test."""
        test_logger.info("Creating AgentRunner instance")
        self.agent = AgentRunner()

    def test_simple_math_calculation_with_steps(self):
        """Test that the agent can perform basic math calculations and log steps."""
        question = "What is the result of the following operation: 5 + 3 + 1294.678?"
        test_logger.info(f"Testing math calculation with question: {question}")

        # Run the agent and get the response
        response = self.agent(question)

        # Verify the response contains the correct result
        expected_result = str(5 + 3 + 1294.678)
        assert (
            expected_result in response
        ), f"Response should contain the result {expected_result}"

        # Verify step logs exist and have required fields
        assert self.agent.last_state is not None, "Agent should store last state"
        assert "step_logs" in self.agent.last_state, "State should contain step_logs"
        assert (
            len(self.agent.last_state["step_logs"]) > 0
        ), "Should have at least one step logged"

        # Verify each step has required fields
        for step in self.agent.last_state["step_logs"]:
            assert "step_number" in step, "Each step should have a step_number"
            assert any(
                key in step for key in ["thought", "code", "observation"]
            ), "Each step should have at least one of: thought, code, or observation"

        # Verify the final answer is indicated
        assert (
            "final_answer" in response.lower()
        ), "Response should indicate it's providing an answer"

    def test_document_qa_and_image_generation_with_steps(self):
        """Test that the agent can search for information and generate images, with step logging."""
        question = (
            "Search for information about the Mona Lisa and generate an image of it."
        )
        test_logger.info(
            f"Testing document QA and image generation with question: {question}"
        )

        # Run the agent and get the response
        response = self.agent(question)

        # Verify the response contains both search and image generation
        assert "mona lisa" in response.lower(), "Response should mention Mona Lisa"
        assert "image" in response.lower(), "Response should mention image generation"

        # Verify step logs exist and show logical progression
        assert self.agent.last_state is not None, "Agent should store last state"
        assert "step_logs" in self.agent.last_state, "State should contain step_logs"
        assert (
            len(self.agent.last_state["step_logs"]) > 1
        ), "Should have multiple steps logged"

        # Verify steps show logical progression
        steps = self.agent.last_state["step_logs"]
        search_steps = [step for step in steps if "search" in str(step).lower()]
        image_steps = [step for step in steps if "image" in str(step).lower()]

        assert len(search_steps) > 0, "Should have search steps"
        assert len(image_steps) > 0, "Should have image generation steps"

        # Verify each step has required fields
        for step in steps:
            assert "step_number" in step, "Each step should have a step_number"
            assert any(
                key in step for key in ["thought", "code", "observation"]
            ), "Each step should have at least one of: thought, code, or observation"


def test_simple_math_calculation_with_steps():
    """Test that the agent can perform a simple math calculation and verify intermediate steps."""
    agent = AgentRunner()
    question = "What is the result of the following operation: 5 + 3 + 1294.678?"

    # Process the question
    response = agent(question)

    # Verify step logs exist and have required fields
    assert agent.last_state is not None, "Last state should be stored"
    step_logs = agent.last_state.get("step_logs", [])
    assert len(step_logs) > 0, "Should have recorded step logs"

    for step in step_logs:
        assert "step_number" in step, "Each step should have a step number"
        assert any(
            key in step for key in ["thought", "code", "observation"]
        ), "Each step should have at least one of thought/code/observation"

    # Verify final answer
    expected_result = 1302.678
    assert (
        str(expected_result) in response
    ), f"Response should contain the result {expected_result}"
    assert (
        "final_answer" in response.lower()
    ), "Response should indicate it's using final_answer"


def test_document_qa_and_image_generation_with_steps():
    """Test document QA and image generation with step verification."""
    agent = AgentRunner()
    question = "Can you search for information about the Mona Lisa and generate an image inspired by it?"

    # Process the question
    response = agent(question)

    # Verify step logs exist and demonstrate logical progression
    assert agent.last_state is not None, "Last state should be stored"
    step_logs = agent.last_state.get("step_logs", [])
    assert len(step_logs) > 0, "Should have recorded step logs"

    # Check for search and image generation steps
    has_search_step = False
    has_image_step = False

    for step in step_logs:
        assert "step_number" in step, "Each step should have a step number"
        assert any(
            key in step for key in ["thought", "code", "observation"]
        ), "Each step should have at least one of thought/code/observation"

        # Look for search and image steps in thoughts or code
        step_content = str(step.get("thought", "")) + str(step.get("code", ""))
        if "search" in step_content.lower():
            has_search_step = True
        if "image" in step_content.lower() or "dalle" in step_content.lower():
            has_image_step = True

    assert has_search_step, "Should include a search step"
    assert has_image_step, "Should include an image generation step"
    assert (
        "final_answer" in response.lower()
    ), "Response should indicate it's using final_answer"


if __name__ == "__main__":
    pytest.main([__file__, "-s", "-v", "-x"])