import os import gradio as gr import requests import pandas as pd from tools import AnswerTool from smolagents import CodeAgent, OpenAIServerModel # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" class BasicAgent: def __init__(self): # Initialize CodeAgent with GPT-4o and only the custom AnswerTool model = OpenAIServerModel(model_id="gpt-4o") answer_tool = AnswerTool() self.agent = CodeAgent( model=model, tools=[answer_tool], add_base_tools=False, max_steps=1, verbosity_level=0 ) def __call__(self, question: str) -> str: # Single-step invocation of AnswerTool return self.agent.run(question) def run_and_submit_all(username): # Username provided manually by the user if not username: return "Please enter your Hugging Face username.", None # Fetch questions try: resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) if resp.status_code == 429: return "Server rate limited the requests. Please wait a moment and try again.", None resp.raise_for_status() questions = resp.json() except Exception as e: return f"Error fetching questions: {e}", None # Run agent on all questions agent = BasicAgent() results = [] payload = [] for q in questions: tid = q.get('task_id') text = q.get('question') if not (tid and text): continue try: ans = agent(text) except Exception as e: ans = f"ERROR: {e}" results.append({'Task ID': tid, 'Question': text, 'Answer': ans}) payload.append({'task_id': tid, 'submitted_answer': ans}) if not payload: return "Agent returned no answers.", pd.DataFrame(results) # Submit answers submission = { 'username': username, 'agent_code': f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main", 'answers': payload } try: sub_resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60) sub_resp.raise_for_status() data = sub_resp.json() status = ( f"Submission Successful!\n" f"User: {data.get('username')}\n" f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n" f"Message: {data.get('message')}" ) except Exception as e: status = f"Submission Failed: {e}" return status, pd.DataFrame(results) def test_random_question(username): if not username: return "Please enter your Hugging Face username.", "" try: q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json() question = q.get('question', '') ans = BasicAgent()(question) return question, ans except Exception as e: return f"Error during test: {e}", "" # --- Gradio UI --- with gr.Blocks() as demo: gr.Markdown("# Basic Agent Evaluation Runner") gr.Markdown( """ **Instructions:** 1. Enter your Hugging Face username. 2. Use **Test Random Question** to check a single question. 3. Use **Run Evaluation & Submit All Answers** to evaluate on all questions. """ ) username_input = gr.Textbox(label="Hugging Face Username", placeholder="your-username") run_btn = gr.Button("Run Evaluation & Submit All Answers") test_btn = gr.Button("Test Random Question") status_out = gr.Textbox(label="Status / Result", lines=5, interactive=False) table_out = gr.DataFrame(label="Full Results Table", wrap=True) question_out = gr.Textbox(label="Random Question", lines=3, interactive=False) answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False) run_btn.click(fn=run_and_submit_all, inputs=[username_input], outputs=[status_out, table_out]) test_btn.click(fn=test_random_question, inputs=[username_input], outputs=[question_out, answer_out]) if __name__ == "__main__": demo.launch(debug=True, share=False)