# app.py import os import time import traceback import requests import pandas as pd import gradio as gr # --- Config from Env --- API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") HF_TOKEN_ENV = os.getenv("HUGGINGFACEHUB_API_TOKEN") WELCOME = """ ## GAIA Benchmark Runner 🎉 Build your agent, score **≥30%** to earn your Certificate, and see where you land on the Student Leaderboard! """ class GAIAAgent: def __init__(self, model_id: str, token: str): print(f"[DEBUG] Initializing GAIAAgent with model={model_id}") # debug self.model_id = model_id self.headers = {"Authorization": f"Bearer {token}"} def answer(self, prompt: str) -> str: print(f"[DEBUG] Sending prompt of length {len(prompt)} to HF Inference") # debug payload = { "inputs": prompt, "parameters": {"max_new_tokens": 512, "temperature": 0.2} } url = f"https://api-inference.huggingface.co/models/{self.model_id}" resp = requests.post(url, headers=self.headers, json=payload, timeout=60) resp.raise_for_status() data = resp.json() print(f"[DEBUG] Got response from model: {data!r}") # debug if isinstance(data, list) and data and "generated_text" in data[0]: return data[0]["generated_text"].strip() return str(data) def run_and_submit_all(profile: gr.OAuthProfile | None): try: print("[DEBUG] run_and_submit_all called") # debug if profile is None: print("[DEBUG] No profile provided") # debug return ("⚠️ Please log in with your Hugging Face account.", pd.DataFrame()) print(f"[DEBUG] Logged in as: {profile.username}") # debug username = profile.username hf_token = HF_TOKEN_ENV or getattr(profile, "access_token", None) print(f"[DEBUG] Using HF token from {'env' if HF_TOKEN_ENV else 'profile'}") # debug if not hf_token: print("[DEBUG] No HF token found") # debug return ( "❌ No Hugging Face token found.\n" "Set HUGGINGFACEHUB_API_TOKEN in Secrets or log in via the button.", pd.DataFrame() ) # 1) Fetch GAIA questions print(f"[DEBUG] Fetching questions from {API_URL}/questions") # debug q_resp = requests.get(f"{API_URL}/questions", timeout=15) q_resp.raise_for_status() questions = q_resp.json() or [] print(f"[DEBUG] Received {len(questions)} questions") # debug if not questions: return ("❌ No questions found. Check your API_URL.", pd.DataFrame()) # 2) Init agent agent = GAIAAgent(MODEL_ID, hf_token) # 3) Answer each results = [] payload = [] for item in questions: print(f"[DEBUG] Processing task_id={item.get('task_id')}") # debug tid = item.get("task_id") qtxt = item.get("question", "") try: ans = agent.answer(qtxt) except Exception as e: ans = f"ERROR: {e}" print(f"[DEBUG] Error answering: {e}") # debug results.append({"Task ID": tid, "Question": qtxt, "Answer": ans}) payload.append({"task_id": tid, "submitted_answer": ans}) time.sleep(0.5) # 4) Submit print(f"[DEBUG] Submitting payload with {len(payload)} answers") # debug submission = {"username": username, "answers": payload} s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) s_resp.raise_for_status() data = s_resp.json() print(f"[DEBUG] Submission response: {data!r}") # debug # 5) Build status text status = ( f"✅ **Submission Successful!**\n\n" f"**User:** {data.get('username')}\n" f"**Score:** {data.get('score')}% " f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" f"**Message:** {data.get('message')}" ) return status, pd.DataFrame(results) except Exception as e: tb = traceback.format_exc() print("[ERROR] Unhandled exception:\n", tb) return (f"❌ Unexpected error:\n{e}", pd.DataFrame()) with gr.Blocks() as demo: gr.Markdown(WELCOME) login = gr.LoginButton() run_btn = gr.Button("▶️ Run GAIA Benchmark") status = gr.Markdown() table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True) run_btn.click( fn=run_and_submit_all, inputs=[login], outputs=[status, table_df] ) if __name__ == "__main__": demo.launch()