# app.py import os import gradio as gr import requests import pandas as pd import tempfile from pathlib import Path from tools import AnswerTool, SpeechToTextTool, ExcelToTextTool from smolagents import CodeAgent, OpenAIServerModel from smolagents import DuckDuckGoSearchTool, WikipediaSearchTool # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" def download_file_if_any(base_api_url: str, task_id: str) -> str | None: """ Try GET /files/{task_id}. • On HTTP 200 → save to a temp dir and return local path. • On 404 → return None. • On other errors → raise so caller can log / handle. """ url = f"{base_api_url}/files/{task_id}" try: resp = requests.get(url, timeout=30) if resp.status_code == 404: return None resp.raise_for_status() except requests.exceptions.HTTPError as e: raise e # Determine filename from headers or default to task_id cd = resp.headers.get("content-disposition", "") filename = task_id if "filename=" in cd: import re m = re.search(r'filename="([^"]+)"', cd) if m: filename = m.group(1) # Save to temp dir tmp_dir = Path(tempfile.gettempdir()) / "gaia_files" tmp_dir.mkdir(exist_ok=True) file_path = tmp_dir / filename with open(file_path, "wb") as f: f.write(resp.content) return str(file_path) class BasicAgent: def __init__(self): # Initialize CodeAgent with GPT-4o, file/audio/excel, web and wiki tools, plus final answer model = OpenAIServerModel(model_id="gpt-4o") answer_tool = AnswerTool() speech_tool = SpeechToTextTool() excel_tool = ExcelToTextTool() web_tool = DuckDuckGoSearchTool() wiki_tool = WikipediaSearchTool() self.agent = CodeAgent( model=model, tools=[speech_tool, excel_tool, wiki_tool, web_tool, answer_tool], add_base_tools=False, additional_authorized_imports=["pandas", "openpyxl"], max_steps=4, planning_interval=1, verbosity_level=1 ) def __call__(self, task_id: str, question: str) -> str: # Pre-fetch any file for this task file_path = None try: file_path = download_file_if_any(DEFAULT_API_URL, task_id) except Exception: pass # Build prompt including file context if any if file_path: prompt = f"{question}\n\n---\nA file for this task was downloaded and saved at: {file_path}\n---" else: prompt = question return self.agent.run(prompt) def run_and_submit_all(username): if not username: return "Please enter your Hugging Face username.", None # 1. Fetch questions try: resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) if resp.status_code == 429: return "Server rate limited the requests. Please wait a moment and try again.", None resp.raise_for_status() questions = resp.json() except Exception as e: return f"Error fetching questions: {e}", None # 2. Instantiate agent agent = BasicAgent() results = [] payload = [] # 3. Run agent on all questions for q in questions: tid = q.get("task_id") text = q.get("question") if not (tid and text): continue try: ans = agent(tid, text) except Exception as e: ans = f"ERROR: {e}" results.append({"Task ID": tid, "Question": text, "Answer": ans}) payload.append({"task_id": tid, "submitted_answer": ans}) if not payload: return "Agent returned no answers.", pd.DataFrame(results) # 4. Submit answers submission = { "username": username, "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main", "answers": payload } try: sub_resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60) sub_resp.raise_for_status() data = sub_resp.json() status = ( f"Submission Successful!\n" f"User: {data.get('username')}\n" f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n" f"Message: {data.get('message')}" ) except Exception as e: status = f"Submission Failed: {e}" return status, pd.DataFrame(results) def test_random_question(username): if not username: return "Please enter your Hugging Face username.", "" try: q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json() question = q.get("question", "") ans = BasicAgent()(q.get('task_id'), question) return question, ans except Exception as e: return f"Error during test: {e}", "" # --- Gradio UI --- with gr.Blocks() as demo: gr.Markdown("# Basic Agent Evaluation Runner") gr.Markdown( """ **Instructions:** 1. Enter your Hugging Face username. 2. Use **Test Random Question** to check a single question. 3. Use **Run Evaluation & Submit All Answers** to evaluate on all questions. """ ) username_input = gr.Textbox(label="Hugging Face Username", placeholder="your-username") run_btn = gr.Button("Run Evaluation & Submit All Answers") test_btn = gr.Button("Test Random Question") status_out = gr.Textbox(label="Status / Result", lines=5, interactive=False) table_out = gr.DataFrame(label="Full Results Table", wrap=True) question_out = gr.Textbox(label="Random Question", lines=3, interactive=False) answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False) run_btn.click(fn=run_and_submit_all, inputs=[username_input], outputs=[status_out, table_out]) test_btn.click(fn=test_random_question, inputs=[username_input], outputs=[question_out, answer_out]) if __name__ == "__main__": demo.launch(debug=True, share=False)