File size: 4,268 Bytes
10e9b7d
6a38a35
eccf8e4
6a38a35
bee5328
6a38a35
bee5328
6a38a35
 
bee5328
 
6a38a35
0e6388c
fd5a08b
 
6a38a35
c396a92
 
fd5a08b
c396a92
 
 
 
 
 
 
 
 
41085c3
fd5a08b
6a38a35
 
 
 
bee5328
6a38a35
 
c396a92
fd5a08b
 
6a38a35
 
bee5328
c396a92
6a38a35
c396a92
 
 
 
 
6a38a35
 
c396a92
6a38a35
fd5a08b
c396a92
 
 
 
 
 
 
 
 
 
 
 
41085c3
6a38a35
c396a92
 
 
 
 
6a38a35
c396a92
 
 
6a38a35
 
 
 
 
0e6388c
6a38a35
 
 
 
 
 
 
 
c396a92
6a38a35
c396a92
fd5a08b
c396a92
 
6a38a35
 
c396a92
6a38a35
c396a92
 
6a38a35
c396a92
 
6a38a35
 
 
 
 
 
ee4d1e5
c396a92
fd5a08b
c396a92
6a38a35
60f0482
6a38a35
 
fd5a08b
c396a92
 
6a38a35
fd5a08b
c396a92
fd5a08b
 
6a38a35
c396a92
 
 
6a38a35
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os
import gradio as gr
import requests
import pandas as pd

from agent import create_agent, fetch_random_question

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


def run_and_submit_all(profile):
    """
    Fetch all questions, run the agent on them, submit all answers,
    and return the status and results table.
    """
    # Check login
    if not profile:
        return "Please login to Hugging Face with the login button.", None

    # Extract username (support both .username and .name)
    username = getattr(profile, "username", None) or getattr(profile, "name", None)
    if not username:
        return "Login error: username not found.", None

    # Build URLs
    questions_url = f"{DEFAULT_API_URL}/questions"
    submit_url = f"{DEFAULT_API_URL}/submit"

    # Instantiate agent
    try:
        agent = create_agent()
    except Exception as e:
        return f"Error initializing agent: {e}", None

    # Fetch questions
    try:
        resp = requests.get(questions_url, timeout=15)
        resp.raise_for_status()
        questions = resp.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # Run agent
    results = []
    answers_payload = []
    for q in questions:
        tid = q.get("task_id")
        text = q.get("question")
        if not tid or not text:
            continue
        try:
            ans = agent.run(question=text)
        except Exception as e:
            ans = f"ERROR: {e}"
        results.append({
            "Task ID": tid,
            "Question": text,
            "Answer": ans
        })
        answers_payload.append({
            "task_id": tid,
            "submitted_answer": ans
        })

    if not answers_payload:
        return "Agent returned no answers.", pd.DataFrame(results)

    # Submit answers
    payload = {
        "username": username,
        "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
        "answers": answers_payload
    }
    try:
        resp = requests.post(submit_url, json=payload, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        status = (
            f"Submission Successful!\n"
            f"User: {data.get('username')}\n"
            f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
            f"Message: {data.get('message')}"
        )
    except Exception as e:
        status = f"Submission Failed: {e}"

    return status, pd.DataFrame(results)


def test_random_question(profile):
    """
    Fetch a random GAIA question and return the agent's answer.
    """
    if not profile:
        return "Please login to Hugging Face with the login button.", ""

    # Get question and run agent
    try:
        q = fetch_random_question()
        question = q.get("question", "")
        agent = create_agent()
        answer = agent.run(question=question)
        return question, answer
    except Exception as e:
        return f"Error during test: {e}", ""


# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# SmolAgent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1. Clone this space and define your agent logic in `agent.py`.
        2. Log in with your Hugging Face account using the login button below.
        3. Use **Run Evaluation & Submit All Answers** or **Test Random Question**.
        """
    )

    login = gr.LoginButton()

    run_button = gr.Button("Run Evaluation & Submit All Answers")
    test_button = gr.Button("Test Random Question")

    status_output = gr.Textbox(label="Status / Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Full Results Table", wrap=True)
    question_box = gr.Textbox(label="Random Question", lines=3, interactive=False)
    answer_box = gr.Textbox(label="Agent Answer", lines=3, interactive=False)

    # Wire the login component directly into the callbacks
    run_button.click(fn=run_and_submit_all, inputs=[login], outputs=[status_output, results_table])
    test_button.click(fn=test_random_question, inputs=[login], outputs=[question_box, answer_box])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)