File size: 3,968 Bytes
10e9b7d
6a38a35
eccf8e4
6a38a35
bee5328
6a38a35
bee5328
6a38a35
 
bee5328
 
6a38a35
0e6388c
6a38a35
 
 
 
bee5328
6a38a35
 
 
41085c3
6a38a35
 
 
 
 
bee5328
6a38a35
bee5328
6a38a35
 
 
 
 
 
 
bee5328
6a38a35
 
bee5328
6a38a35
 
 
 
 
 
 
 
 
 
 
 
 
 
41085c3
6a38a35
 
 
 
 
 
 
 
 
 
 
0e6388c
6a38a35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60f0482
6a38a35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import gradio as gr
import requests
import pandas as pd

from agent import create_agent, fetch_random_question

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


def run_and_submit_all(profile):
    """
    Fetch all questions, run the SmolAgent on them, submit all answers,
    and display the results.
    """
    space_id = os.getenv("SPACE_ID")

    if profile is None:
        return "Please login to Hugging Face with the button.", None
    username = profile.username

    # Instantiate the agent
    try:
        agent = create_agent()
    except Exception as e:
        return f"Error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # Fetch questions
    try:
        response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        response.raise_for_status()
        questions = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    if not questions:
        return "No questions fetched.", None

    # Run agent on each question
    results = []
    payload = []
    for q in questions:
        task_id = q.get("task_id")
        question_text = q.get("question")
        if not task_id or not question_text:
            continue
        try:
            answer = agent.run(question=question_text)
        except Exception as e:
            answer = f"ERROR: {e}"
        results.append({"Task ID": task_id, "Question": question_text, "Answer": answer})
        payload.append({"task_id": task_id, "submitted_answer": answer})

    # Submit answers
    submit_payload = {"username": username, "agent_code": agent_code, "answers": payload}
    try:
        resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submit_payload, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        status = (
            f"Submission Successful!\n"
            f"User: {data.get('username')}\n"
            f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
            f"Message: {data.get('message')}"
        )
    except Exception as e:
        status = f"Submission Failed: {e}"

    return status, pd.DataFrame(results)


def test_random_question(profile):
    """
    Fetch a random GAIA question and return its answer by the agent.
    """
    if profile is None:
        return "Please login to test.", ""
    try:
        q = fetch_random_question()
        agent = create_agent()
        ans = agent.run(question=q.get("question", ""))
        return q.get("question", ""), ans
    except Exception as e:
        return f"Error: {e}", ""


# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# SmolAgent Evaluation Runner")
    gr.Markdown(
        """
        **Istruzioni:**
        1. Clone questo space e definisci la logica in agent.py.
        2. Effettua il login con il tuo account Hugging Face.
        3. Usa 'Run Evaluation & Submit All Answers' o 'Test Random Question'.
        """
    )

    # OAuth login and user state
    login = gr.LoginButton()
    user = gr.State()
    # On login, store profile in state
    login.click(fn=lambda profile: profile, inputs=[login], outputs=[user])

    run_all = gr.Button("Run Evaluation & Submit All Answers")
    test = gr.Button("Test Random Question")

    status = gr.Textbox(label="Status / Risultato", lines=5, interactive=False)
    table = gr.DataFrame(label="Risultati Completi", wrap=True)
    qbox = gr.Textbox(label="Domanda Casuale", lines=3, interactive=False)
    abox = gr.Textbox(label="Risposta Agente", lines=3, interactive=False)

    # Use stored user state as input
    run_all.click(fn=run_and_submit_all, inputs=[user], outputs=[status, table])
    test.click(fn=test_random_question, inputs=[user], outputs=[qbox, abox])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)
    demo.launch(debug=True, share=False)