# app.py import os import time import requests import pandas as pd import gradio as gr # --- Config from Env --- API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") HF_TOKEN_ENV = os.getenv("HUGGINGFACEHUB_API_TOKEN") WELCOME = """ ## GAIA Benchmark Runner 🎉 Build your agent, score **≥30%** to earn your Certificate, and see where you land on the Student Leaderboard! """ # --- Simple HF-Inference Agent --- class GAIAAgent: def __init__(self, model_id: str, token: str): self.model_id = model_id self.headers = {"Authorization": f"Bearer {token}"} def answer(self, prompt: str) -> str: payload = { "inputs": prompt, "parameters": { "max_new_tokens": 512, "temperature": 0.2 } } url = f"https://api-inference.huggingface.co/models/{self.model_id}" resp = requests.post(url, headers=self.headers, json=payload, timeout=60) resp.raise_for_status() data = resp.json() if isinstance(data, list) and data and "generated_text" in data[0]: return data[0]["generated_text"].strip() return str(data) # --- Gradio callback --- def run_and_submit_all(profile: gr.OAuthProfile | None): if profile is None: return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame() username = profile.username hf_token = HF_TOKEN_ENV or getattr(profile, "access_token", None) if not hf_token: return ( "❌ No Hugging Face token found.\n" "Set HUGGINGFACEHUB_API_TOKEN in Secrets or log in via the button.", pd.DataFrame() ) # 1) Fetch GAIA questions q_resp = requests.get(f"{API_URL}/questions", timeout=15) q_resp.raise_for_status() questions = q_resp.json() or [] if not questions: return "❌ No questions found. Check your API_URL.", pd.DataFrame() # 2) Init agent agent = GAIAAgent(MODEL_ID, hf_token) # 3) Answer each results = [] payload = [] for item in questions: tid = item.get("task_id") qtxt = item.get("question", "") try: ans = agent.answer(qtxt) except Exception as e: ans = f"ERROR: {e}" results.append({"Task ID": tid, "Question": qtxt, "Answer": ans}) payload.append({"task_id": tid, "submitted_answer": ans}) time.sleep(0.5) # 4) Submit (no agent_code) submission = { "username": username, "answers": payload } s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) s_resp.raise_for_status() data = s_resp.json() # 5) Build status text status = ( f"✅ **Submission Successful!**\n\n" f"**User:** {data.get('username')}\n" f"**Score:** {data.get('score')}% " f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" f"**Message:** {data.get('message')}" ) return status, pd.DataFrame(results) # --- Gradio UI --- with gr.Blocks() as demo: gr.Markdown(WELCOME) login = gr.LoginButton() run_btn = gr.Button("▶️ Run GAIA Benchmark") status = gr.Markdown() table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True) run_btn.click( fn=run_and_submit_all, inputs=[login], outputs=[status, table_df] ) if __name__ == "__main__": demo.launch()