File size: 3,730 Bytes
10e9b7d
950d883
eccf8e4
3c4371f
950d883
10e9b7d
e89ec77
950d883
e89ec77
 
 
950d883
 
 
e89ec77
 
 
950d883
 
e89ec77
 
950d883
e89ec77
 
950d883
 
e89ec77
950d883
e89ec77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
950d883
 
 
5f30ad7
 
950d883
 
 
 
 
e89ec77
950d883
e89ec77
 
950d883
e89ec77
 
 
950d883
e89ec77
 
 
950d883
e89ec77
950d883
e89ec77
 
 
 
950d883
e89ec77
5f30ad7
950d883
5f30ad7
950d883
5f30ad7
950d883
 
 
 
e89ec77
950d883
 
 
 
 
 
 
5f30ad7
 
e89ec77
e80aab9
e89ec77
950d883
e89ec77
 
 
950d883
 
 
 
e89ec77
950d883
e80aab9
5f30ad7
950d883
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import time
import requests
import pandas as pd
import gradio as gr

# --- Config from Env ---
API_URL  = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space")
SPACE_ID = os.getenv("SPACE_ID")
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct")

if not all([SPACE_ID, HF_TOKEN]):
    raise RuntimeError(
        "❌ Please set these in your Space Secrets:\n"
        " • SPACE_ID       (e.g. user/your-space)\n"
        " • HUGGINGFACEHUB_API_TOKEN"
    )

WELCOME = """
## GAIA Benchmark Runner 🎉

Build your agent, score **≥30%** to earn the Certificate of Completion,  
and see where you land on the Student Leaderboard!
"""

# --- Simple HF-Inference Agent ---
class GAIAAgent:
    def __init__(self, model_id: str, token: str):
        self.model_id = model_id
        self.headers = {"Authorization": f"Bearer {token}"}

    def answer(self, prompt: str) -> str:
        payload = {
            "inputs": prompt,
            "parameters": {
                "max_new_tokens": 512,
                "temperature": 0.2
            }
        }
        url = f"https://api-inference.huggingface.co/models/{self.model_id}"
        resp = requests.post(url, headers=self.headers, json=payload, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        # data is a list of {generated_text: "..."}
        if isinstance(data, list) and data and "generated_text" in data[0]:
            return data[0]["generated_text"].strip()
        return str(data)

# --- Gradio callback ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
    if profile is None:
        return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame()
    username = profile.username

    # 1) Fetch GAIA questions
    q_resp = requests.get(f"{API_URL}/questions", timeout=15)
    q_resp.raise_for_status()
    questions = q_resp.json() or []
    if not questions:
        return "❌ No questions found. Check your API_URL.", pd.DataFrame()

    # 2) Init agent
    agent = GAIAAgent(MODEL_ID, HF_TOKEN)

    # 3) Answer each
    results = []
    payload = []
    for item in questions:
        tid  = item.get("task_id")
        qtxt = item.get("question","")
        # Some tasks include a file path; you can fetch and append it if you like.
        try:
            ans = agent.answer(qtxt)
        except Exception as e:
            ans = f"ERROR: {e}"
        results.append({"Task ID": tid, "Question": qtxt, "Answer": ans})
        payload.append({"task_id": tid, "submitted_answer": ans})
        time.sleep(0.5)

    # 4) Submit
    submission = {
        "username":   username,
        "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main",
        "answers":    payload
    }
    s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
    s_resp.raise_for_status()
    data = s_resp.json()

    # 5) Build status text
    status = (
        f"✅ **Submission Successful!**\n\n"
        f"**User:** {data.get('username')}\n"
        f"**Score:** {data.get('score')}% "
        f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n"
        f"**Message:** {data.get('message')}"
    )
    return status, pd.DataFrame(results)

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown(WELCOME)
    login = gr.LoginButton()
    run_btn   = gr.Button("▶️ Run GAIA Benchmark")
    status_md = gr.Markdown()
    table_df  = gr.Dataframe(headers=["Task ID","Question","Answer"], wrap=True)

    run_btn.click(
        fn=run_and_submit_all,
        inputs=[login],
        outputs=[status_md, table_df]
    )

if __name__ == "__main__":
    demo.launch()