import os import time import requests import pandas as pd import gradio as gr # --- Config from Env --- API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") SPACE_ID = os.getenv("SPACE_ID") HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") if not all([SPACE_ID, HF_TOKEN]): raise RuntimeError( "❌ Please set these in your Space Secrets:\n" " • SPACE_ID (e.g. user/your-space)\n" " • HUGGINGFACEHUB_API_TOKEN" ) WELCOME = """ ## GAIA Benchmark Runner 🎉 Build your agent, score **≥30%** to earn the Certificate of Completion, and see where you land on the Student Leaderboard! """ # --- Simple HF-Inference Agent --- class GAIAAgent: def __init__(self, model_id: str, token: str): self.model_id = model_id self.headers = {"Authorization": f"Bearer {token}"} def answer(self, prompt: str) -> str: payload = { "inputs": prompt, "parameters": { "max_new_tokens": 512, "temperature": 0.2 } } url = f"https://api-inference.huggingface.co/models/{self.model_id}" resp = requests.post(url, headers=self.headers, json=payload, timeout=60) resp.raise_for_status() data = resp.json() # data is a list of {generated_text: "..."} if isinstance(data, list) and data and "generated_text" in data[0]: return data[0]["generated_text"].strip() return str(data) # --- Gradio callback --- def run_and_submit_all(profile: gr.OAuthProfile | None): if profile is None: return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame() username = profile.username # 1) Fetch GAIA questions q_resp = requests.get(f"{API_URL}/questions", timeout=15) q_resp.raise_for_status() questions = q_resp.json() or [] if not questions: return "❌ No questions found. Check your API_URL.", pd.DataFrame() # 2) Init agent agent = GAIAAgent(MODEL_ID, HF_TOKEN) # 3) Answer each results = [] payload = [] for item in questions: tid = item.get("task_id") qtxt = item.get("question","") # Some tasks include a file path; you can fetch and append it if you like. try: ans = agent.answer(qtxt) except Exception as e: ans = f"ERROR: {e}" results.append({"Task ID": tid, "Question": qtxt, "Answer": ans}) payload.append({"task_id": tid, "submitted_answer": ans}) time.sleep(0.5) # 4) Submit submission = { "username": username, "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main", "answers": payload } s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) s_resp.raise_for_status() data = s_resp.json() # 5) Build status text status = ( f"✅ **Submission Successful!**\n\n" f"**User:** {data.get('username')}\n" f"**Score:** {data.get('score')}% " f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" f"**Message:** {data.get('message')}" ) return status, pd.DataFrame(results) # --- Gradio UI --- with gr.Blocks() as demo: gr.Markdown(WELCOME) login = gr.LoginButton() run_btn = gr.Button("▶️ Run GAIA Benchmark") status_md = gr.Markdown() table_df = gr.Dataframe(headers=["Task ID","Question","Answer"], wrap=True) run_btn.click( fn=run_and_submit_all, inputs=[login], outputs=[status_md, table_df] ) if __name__ == "__main__": demo.launch()