import logging import pytest import requests from langgraph.types import Command from agent import AgentRunner # Configure test logger test_logger = logging.getLogger("test_agent") test_logger.setLevel(logging.INFO) # Suppress specific warnings pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning:httpx._models") # Constants DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" QUESTIONS_URL = f"{DEFAULT_API_URL}/questions" @pytest.fixture(scope="session") def agent(): """Fixture to create and return an AgentRunner instance.""" test_logger.info("Creating AgentRunner instance") return AgentRunner() # @pytest.fixture(scope="session") # def questions_data(): # """Fixture to fetch questions from the API.""" # test_logger.info(f"Fetching questions from: {QUESTIONS_URL}") # try: # response = requests.get(QUESTIONS_URL, timeout=15) # response.raise_for_status() # data = response.json() # if not data: # test_logger.error("Fetched questions list is empty.") # return [] # test_logger.info(f"Fetched {len(data)} questions.") # return data # except requests.exceptions.RequestException as e: # test_logger.error(f"Error fetching questions: {e}") # return [] # except requests.exceptions.JSONDecodeError as e: # test_logger.error(f"Error decoding JSON response from questions endpoint: {e}") # return [] # except Exception as e: # test_logger.error(f"An unexpected error occurred fetching questions: {e}") # return [] # # class TestAppQuestions: # """Test cases for questions from the app.""" # # def test_first_app_question(self, agent, questions_data): # """Test the agent's response to the first app question.""" # if not questions_data: # pytest.skip("No questions available from API") # # first_question = questions_data[0] # question_text = first_question.get("question") # task_id = first_question.get("task_id") # # if not question_text or not task_id: # pytest.skip("First question is missing required fields") # # test_logger.info(f"Testing with app question: {question_text}") # # response = agent(question_text) # test_logger.info(f"Agent response: {response}") # # # Check that the response contains the expected information # assert "Mercedes Sosa" in response, "Response should mention Mercedes Sosa" # assert "studio albums" in response.lower(), "Response should mention studio albums" # assert "2000" in response and "2009" in response, "Response should mention the year range" # # # Verify that a number is mentioned (either as word or digit) # import re # number_pattern = r'\b(one|two|three|four|five|six|seven|eight|nine|ten|\d+)\b' # has_number = bool(re.search(number_pattern, response.lower())) # assert has_number, "Response should include the number of albums" # # # Check for album names in the response # known_albums = [ # "Corazón Libre", # "Cantora", # "Hermano", # "Acústico", # "Argentina quiere cantar" # ] # found_albums = [album for album in known_albums if album in response] # assert len(found_albums) > 0, "Response should mention at least some of the known albums" # # # Check for a structured response # assert re.search(r'\d+\.\s+[^(]+\(\d{4}\)', response), \ # "Response should list albums with years" class TestBasicCodeAgentCapabilities: """Test basic capabilities of the code agent.""" def setup_method(self): """Setup method to initialize the agent before each test.""" test_logger.info("Creating AgentRunner instance") self.agent = AgentRunner() def test_simple_math_calculation_with_steps(self): """Test that the agent can perform basic math calculations and log steps.""" question = "What is the result of the following operation: 5 + 3 + 1294.678?" test_logger.info(f"Testing math calculation with question: {question}") # Run the agent and get the response response = self.agent(question) # Verify the response contains the correct result expected_result = str(5 + 3 + 1294.678) assert ( expected_result in response ), f"Response should contain the result {expected_result}" # Verify step logs exist and have required fields assert self.agent.last_state is not None, "Agent should store last state" assert "step_logs" in self.agent.last_state, "State should contain step_logs" assert ( len(self.agent.last_state["step_logs"]) > 0 ), "Should have at least one step logged" # Verify each step has required fields for step in self.agent.last_state["step_logs"]: assert "step_number" in step, "Each step should have a step_number" assert any( key in step for key in ["thought", "code", "observation"] ), "Each step should have at least one of: thought, code, or observation" # Verify the final answer is indicated assert ( "final_answer" in response.lower() ), "Response should indicate it's providing an answer" def test_document_qa_and_image_generation_with_steps(self): """Test that the agent can search for information and generate images, with step logging.""" question = ( "Search for information about the Mona Lisa and generate an image of it." ) test_logger.info( f"Testing document QA and image generation with question: {question}" ) # Run the agent and get the response response = self.agent(question) # Verify the response contains both search and image generation assert "mona lisa" in response.lower(), "Response should mention Mona Lisa" assert "image" in response.lower(), "Response should mention image generation" # Verify step logs exist and show logical progression assert self.agent.last_state is not None, "Agent should store last state" assert "step_logs" in self.agent.last_state, "State should contain step_logs" assert ( len(self.agent.last_state["step_logs"]) > 1 ), "Should have multiple steps logged" # Verify steps show logical progression steps = self.agent.last_state["step_logs"] search_steps = [step for step in steps if "search" in str(step).lower()] image_steps = [step for step in steps if "image" in str(step).lower()] assert len(search_steps) > 0, "Should have search steps" assert len(image_steps) > 0, "Should have image generation steps" # Verify each step has required fields for step in steps: assert "step_number" in step, "Each step should have a step_number" assert any( key in step for key in ["thought", "code", "observation"] ), "Each step should have at least one of: thought, code, or observation" def test_simple_math_calculation_with_steps(): """Test that the agent can perform a simple math calculation and verify intermediate steps.""" agent = AgentRunner() question = "What is the result of the following operation: 5 + 3 + 1294.678?" # Process the question response = agent(question) # Verify step logs exist and have required fields assert agent.last_state is not None, "Last state should be stored" step_logs = agent.last_state.get("step_logs", []) assert len(step_logs) > 0, "Should have recorded step logs" for step in step_logs: assert "step_number" in step, "Each step should have a step number" assert any( key in step for key in ["thought", "code", "observation"] ), "Each step should have at least one of thought/code/observation" # Verify final answer expected_result = 1302.678 assert ( str(expected_result) in response ), f"Response should contain the result {expected_result}" assert ( "final_answer" in response.lower() ), "Response should indicate it's using final_answer" def test_document_qa_and_image_generation_with_steps(): """Test document QA and image generation with step verification.""" agent = AgentRunner() question = "Can you search for information about the Mona Lisa and generate an image inspired by it?" # Process the question response = agent(question) # Verify step logs exist and demonstrate logical progression assert agent.last_state is not None, "Last state should be stored" step_logs = agent.last_state.get("step_logs", []) assert len(step_logs) > 0, "Should have recorded step logs" # Check for search and image generation steps has_search_step = False has_image_step = False for step in step_logs: assert "step_number" in step, "Each step should have a step number" assert any( key in step for key in ["thought", "code", "observation"] ), "Each step should have at least one of thought/code/observation" # Look for search and image steps in thoughts or code step_content = str(step.get("thought", "")) + str(step.get("code", "")) if "search" in step_content.lower(): has_search_step = True if "image" in step_content.lower() or "dalle" in step_content.lower(): has_image_step = True assert has_search_step, "Should include a search step" assert has_image_step, "Should include an image generation step" assert ( "final_answer" in response.lower() ), "Response should indicate it's using final_answer" if __name__ == "__main__": pytest.main([__file__, "-s", "-v", "-x"])