File size: 3,403 Bytes
10e9b7d
6a38a35
eccf8e4
6a38a35
bee5328
6a38a35
bee5328
6a38a35
bee5328
6a38a35
c396a92
fd5a08b
c396a92
 
 
 
6a38a35
 
bd7cd5b
 
 
6a38a35
 
bee5328
bd7cd5b
 
 
 
 
c396a92
bd7cd5b
 
6a38a35
 
bd7cd5b
6a38a35
fd5a08b
bd7cd5b
 
 
 
c396a92
41085c3
bd7cd5b
 
c396a92
 
bd7cd5b
c396a92
6a38a35
bd7cd5b
c396a92
 
6a38a35
 
bd7cd5b
 
 
0e6388c
6a38a35
 
 
 
 
 
 
c396a92
fd5a08b
bd7cd5b
 
 
 
 
c396a92
6a38a35
 
 
 
 
ee4d1e5
c396a92
fd5a08b
c396a92
6a38a35
60f0482
6a38a35
 
bd7cd5b
 
fd5a08b
bd7cd5b
 
 
 
6a38a35
bd7cd5b
 
6a38a35
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
import gradio as gr
import requests
import pandas as pd

from agent import create_agent, fetch_random_question

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

def run_and_submit_all(profile):
    if not profile:
        return "Please login to Hugging Face with the login button.", None
    username = getattr(profile, "username", None) or getattr(profile, "name", None)
    if not username:
        return "Login error: username not found.", None

    # Fetch questions
    try:
        qs = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        qs.raise_for_status()
        questions = qs.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # Instantiate agent
    agent = create_agent()

    # Run and collect answers
    results, payload = [], []
    for q in questions:
        tid, text = q.get("task_id"), q.get("question")
        if not (tid and text):
            continue
        try:
            ans = agent.run(text)   # <-- positional!
        except Exception as e:
            ans = f"ERROR: {e}"
        results.append({"Task ID": tid, "Question": text, "Answer": ans})
        payload.append({"task_id": tid, "submitted_answer": ans})

    if not payload:
        return "Agent returned no answers.", pd.DataFrame(results)

    # Submit
    submission = {
        "username": username,
        "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
        "answers": payload
    }
    try:
        resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        status = (
            f"Submission Successful!\n"
            f"User: {data['username']}\n"
            f"Score: {data['score']}% ({data['correct_count']}/{data['total_attempted']})\n"
            f"Message: {data['message']}"
        )
    except Exception as e:
        status = f"Submission Failed: {e}"

    return status, pd.DataFrame(results)


def test_random_question(profile):
    if not profile:
        return "Please login to Hugging Face with the login button.", ""
    q = fetch_random_question()
    agent = create_agent()
    # Positional call again
    answer = agent.run(q["question"])
    return q["question"], answer


with gr.Blocks() as demo:
    gr.Markdown("# SmolAgent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1. Clone this space and define your agent logic in `agent.py`.
        2. Log in with your Hugging Face account using the login button below.
        3. Use **Run Evaluation & Submit All Answers** or **Test Random Question**.
        """
    )

    login = gr.LoginButton()
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    test_btn = gr.Button("Test Random Question")

    status_out = gr.Textbox(label="Status / Result", lines=5, interactive=False)
    table_out = gr.DataFrame(label="Full Results Table", wrap=True)
    question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
    answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)

    run_btn.click(fn=run_and_submit_all, inputs=[login], outputs=[status_out, table_out])
    test_btn.click(fn=test_random_question, inputs=[login], outputs=[question_out, answer_out])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)