# app.py import os import time import traceback import requests import pandas as pd import gradio as gr # ─── Configuration ────────────────────────────────────────────────────────── API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") if not HF_TOKEN: raise RuntimeError( "❌ Please set HUGGINGFACEHUB_API_TOKEN in your Space Secrets." ) HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} WELCOME = """ ## GAIA Benchmark Runner 🎉 Build your agent, score **≥30%** to earn your Certificate, and see where you land on the Student Leaderboard! """ # ─── Utility to fetch your HF username from the token ──────────────────────── def get_hf_username(): try: resp = requests.get("https://huggingface.co/api/whoami-v2", headers=HEADERS, timeout=10) resp.raise_for_status() data = resp.json() # V2 returns {"user": { "id": ..., "username": ... }, ...} return data.get("user", {}).get("username") or data.get("name") except Exception as e: print("[DEBUG] whoami failed:", e) return None # ─── Simple HF-Inference Agent ───────────────────────────────────────────── class GAIAAgent: def __init__(self, model_id: str): print(f"[DEBUG] Initializing with model {model_id}") self.model_id = model_id self.headers = HEADERS def answer(self, prompt: str) -> str: payload = { "inputs": prompt, "parameters": {"max_new_tokens": 512, "temperature": 0.2} } url = f"https://api-inference.huggingface.co/models/{self.model_id}" resp = requests.post(url, headers=self.headers, json=payload, timeout=60) resp.raise_for_status() data = resp.json() if isinstance(data, list) and data and "generated_text" in data[0]: return data[0]["generated_text"].strip() return str(data) # ─── Gradio callback ──────────────────────────────────────────────────────── def run_and_submit_all(): try: # 0) Resolve username username = get_hf_username() if not username: return "❌ Could not fetch your HF username. Check your token.", pd.DataFrame() # 1) Fetch GAIA questions q_resp = requests.get(f"{API_URL}/questions", timeout=15) q_resp.raise_for_status() questions = q_resp.json() or [] if not questions: return "❌ No questions returned; check your API_URL.", pd.DataFrame() # 2) Init agent agent = GAIAAgent(MODEL_ID) # 3) Answer each results = [] payload = [] for item in questions: tid = item.get("task_id") qtxt = item.get("question", "") try: ans = agent.answer(qtxt) except Exception as e: ans = f"ERROR: {e}" results.append({"Task ID": tid, "Question": qtxt, "Answer": ans}) payload.append({"task_id": tid, "submitted_answer": ans}) time.sleep(0.5) # 4) Submit all answers submission = { "username": username, "answers": payload } s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) s_resp.raise_for_status() data = s_resp.json() # 5) Build status text status = ( f"✅ **Submission Successful!**\n\n" f"**User:** {data.get('username')}\n" f"**Score:** {data.get('score')}% " f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" f"**Message:** {data.get('message')}" ) return status, pd.DataFrame(results) except Exception as e: tb = traceback.format_exc() print("[ERROR] Unhandled exception:\n", tb) return (f"❌ Unexpected error:\n{e}\n\nSee logs for details."), pd.DataFrame() # ─── Gradio UI ────────────────────────────────────────────────────────────── with gr.Blocks() as demo: gr.Markdown(WELCOME) run_btn = gr.Button("▶️ Run GAIA Benchmark") status = gr.Markdown() table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True) run_btn.click( fn=run_and_submit_all, inputs=[], outputs=[status, table_df] ) if __name__ == "__main__": demo.launch()