File size: 5,026 Bytes
10e9b7d
 
eccf8e4
3c4371f
10e9b7d
df283f3
b76945c
e80aab9
3db6293
e80aab9
b76945c
 
31243f4
df283f3
 
31243f4
7d65c66
df283f3
3c4371f
7e4a06b
df283f3
3c4371f
7e4a06b
3c4371f
df283f3
3c4371f
7e4a06b
31243f4
 
e80aab9
df283f3
31243f4
b76945c
 
31243f4
3c4371f
31243f4
b76945c
df283f3
36ed51a
c1fd3d2
3c4371f
df283f3
31243f4
eccf8e4
31243f4
7d65c66
31243f4
 
b76945c
 
31243f4
df283f3
31243f4
 
e80aab9
df283f3
7d65c66
 
3c4371f
31243f4
 
 
 
df283f3
31243f4
 
df283f3
 
 
31243f4
df283f3
 
31243f4
 
df283f3
e80aab9
df283f3
 
 
e80aab9
df283f3
 
 
 
e80aab9
df283f3
 
 
e80aab9
df283f3
7d65c66
df283f3
 
e80aab9
 
df283f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80aab9
b76945c
0ee0419
e514fd7
 
df283f3
 
 
e514fd7
e80aab9
 
df283f3
 
 
e80aab9
df283f3
 
 
 
e80aab9
df283f3
 
e80aab9
 
b76945c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import gradio as gr
import requests
import pandas as pd

from agent import create_agent, fetch_random_question

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetch all questions, run the SmolAgent on them, submit all answers,
    and display the results.
    """
    # --- Determine HF Space Runtime URL and Repo URL ---
    space_id = os.getenv("SPACE_ID")

    if profile:
        username = profile.username
        print(f"User logged in: {username}")
    else:
        print("User not logged in.")
        return "Please login to Hugging Face with the button.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # 1. Instantiate SmolAgent
    try:
        agent = create_agent()
        print("SmolAgent initialized.")
    except Exception as e:
        print(f"Error instantiating agent: {e}")
        return f"Error initializing agent: {e}", None

    # Code link for verification
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(agent_code)

    # 2. Fetch all questions
    print(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
            print("Fetched questions list is empty.")
            return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
    except Exception as e:
        print(f"Error fetching questions: {e}")
        return f"Error fetching questions: {e}", None

    # 3. Run agent on each question
    results_log = []
    answers_payload = []
    print(f"Running agent on {len(questions_data)} questions...")
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            print(f"Skipping invalid item: {item}")
            continue
        try:
            answer = agent.run(question=question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Answer": answer})
        except Exception as e:
            print(f"Error on task {task_id}: {e}")
            results_log.append({"Task ID": task_id, "Question": question_text, "Answer": f"ERROR: {e}"})

    if not answers_payload:
        return "Agent produced no answers.", pd.DataFrame(results_log)

    # 4. Submit answers
    payload = {"username": username, "agent_code": agent_code, "answers": answers_payload}
    print(f"Submitting {len(answers_payload)} answers...")
    try:
        resp = requests.post(submit_url, json=payload, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        status = (
            f"Submission Successful!\n"
            f"User: {data.get('username')}\n"
            f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
            f"Message: {data.get('message')}"
        )
        return status, pd.DataFrame(results_log)
    except Exception as e:
        print(f"Submission error: {e}")
        return f"Submission Failed: {e}", pd.DataFrame(results_log)


def test_random_question(profile: gr.OAuthProfile | None):
    """
    Fetch a random GAIA question and get the agent's answer for testing.
    """
    if not profile:
        return "Please login to test.", ""

    try:
        q = fetch_random_question()
        agent = create_agent()
        ans = agent.run(question=q.get('question', ''))
        return q.get('question', ''), ans
    except Exception as e:
        print(f"Test error: {e}")
        return f"Error: {e}", ""


# --- Build Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# SmolAgent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1. Clone this space and define your agent logic in agent.py.
        2. Log in with your Hugging Face account.
        3. Use 'Run Evaluation & Submit All Answers' or 'Test Random Question'.
        """
    )

    login = gr.LoginButton()
    run_all_btn = gr.Button("Run Evaluation & Submit All Answers")
    test_btn = gr.Button("Test Random Question")

    status_box = gr.Textbox(label="Status / Result", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Full Results Table", wrap=True)
    question_box = gr.Textbox(label="Random Question", lines=3, interactive=False)
    answer_box = gr.Textbox(label="Agent Answer", lines=3, interactive=False)

    run_all_btn.click(fn=run_and_submit_all, inputs=[login], outputs=[status_box, results_table])
    test_btn.click(fn=test_random_question, inputs=[login], outputs=[question_box, answer_box])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)