# app.py import os import time import requests import pandas as pd import gradio as gr from smolagents import ( CodeAgent, DuckDuckGoSearchTool, PythonInterpreterTool, InferenceClientModel ) # --- Configuration --- API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") SPACE_ID = os.getenv("SPACE_ID") # e.g. "your-username/your-space" HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Hugging Face token # No need for HF_USERNAME—Gradio OAuthProfile provides it if not all([SPACE_ID, HF_TOKEN]): raise RuntimeError( "Please set the following environment variables in your Space settings:\n" " • SPACE_ID\n" " • HUGGINGFACEHUB_API_TOKEN" ) WELCOME_TEXT = """ ## Welcome to the GAIA Benchmark Runner 🎉 This challenge is your final hands-on project: - Build an agent and evaluate it on a subset of the GAIA benchmark. - You need **≥30%** to earn your Certificate of Completion. 🏅 - Submit your score and see how you stack up on the Student Leaderboard! """ # --- Agent Definition --- class GAIAAgent: def __init__(self, model_id="meta-llama/Llama-3-70B-Instruct"): # Initialize HF Inference client self.model = InferenceClientModel( model_id=model_id, token=HF_TOKEN, provider="hf-inference", timeout=120, temperature=0.2 ) # Attach search + code execution tools tools = [ DuckDuckGoSearchTool(), PythonInterpreterTool() ] self.agent = CodeAgent( tools=tools, model=self.model, executor_type="local" ) def answer(self, question: str, task_file: str = None) -> str: prompt = question if task_file: try: with open(task_file, "r") as f: content = f.read() prompt += f"\n\nAttached file:\n```\n{content}\n```" except: pass return self.agent.run(prompt) # --- Runner & Submission --- def run_and_submit_all(profile: gr.OAuthProfile | None): if profile is None: return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame() username = profile.username # 1) Fetch GAIA questions q_resp = requests.get(f"{API_URL}/questions", timeout=15) q_resp.raise_for_status() questions = q_resp.json() or [] if not questions: return "❌ No questions returned; check your API_URL.", pd.DataFrame() # 2) Initialize your agent agent = GAIAAgent() # 3) Run agent on each question results, payload = [], [] for item in questions: task_id = item.get("task_id") question = item.get("question", "") file_path = item.get("task_file_path") # optional try: answer = agent.answer(question, file_path) except Exception as e: answer = f"ERROR: {e}" results.append({ "Task ID": task_id, "Question": question, "Answer": answer }) payload.append({ "task_id": task_id, "submitted_answer": answer }) time.sleep(0.5) # throttle requests # 4) Submit all answers submission = { "username": username, "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main", "answers": payload } s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) s_resp.raise_for_status() data = s_resp.json() # 5) Build status message status = ( f"✅ **Submission Successful!**\n\n" f"**User:** {data.get('username')}\n" f"**Score:** {data.get('score')}% " f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" f"**Message:** {data.get('message')}" ) return status, pd.DataFrame(results) # --- Gradio Interface --- with gr.Blocks() as demo: gr.Markdown(WELCOME_TEXT) login = gr.LoginButton() run_btn = gr.Button("▶️ Run Benchmark & Submit") status_out = gr.Markdown() table_out = gr.Dataframe(headers=["Task ID","Question","Answer"], wrap=True) run_btn.click( fn=run_and_submit_all, inputs=[login], outputs=[status_out, table_out] ) if __name__ == "__main__": demo.launch()