File size: 3,992 Bytes
10e9b7d
6a38a35
eccf8e4
6a38a35
bee5328
e0cc1b7
de8170e
bee5328
e0cc1b7
6a38a35
bee5328
e0cc1b7
 
03a058f
21325a3
e0cc1b7
 
 
 
 
 
 
 
 
03a058f
e0cc1b7
 
de8170e
03a058f
 
c396a92
03a058f
c396a92
de8170e
6a38a35
e0cc1b7
 
 
6a38a35
 
bee5328
21325a3
e0cc1b7
de8170e
 
c396a92
de8170e
 
03a058f
de8170e
6a38a35
e0cc1b7
6a38a35
fd5a08b
de8170e
 
bd7cd5b
 
c396a92
41085c3
21325a3
bd7cd5b
de8170e
 
 
c396a92
6a38a35
e0cc1b7
 
 
6a38a35
 
de8170e
 
 
0e6388c
6a38a35
 
 
 
 
de8170e
76f29b6
 
 
 
e0cc1b7
 
de8170e
 
e0cc1b7
 
6a38a35
e0cc1b7
6a38a35
e0cc1b7
de8170e
 
 
76f29b6
03a058f
 
de8170e
 
21325a3
03a058f
bd7cd5b
 
21325a3
bd7cd5b
 
 
 
6a38a35
76f29b6
 
6a38a35
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import gradio as gr
import requests
import pandas as pd

from tools import FinalAnswerTool
from smolagents import CodeAgent, OpenAIServerModel

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

class BasicAgent:
    def __init__(self):
        # Use GPT-4o; ensure your OpenAI API key has access
        model = OpenAIServerModel(model_id="gpt-4o")
        final_tool = FinalAnswerTool()
        self.agent = CodeAgent(
            model=model,
            tools=[final_tool],
            max_steps=3,
            verbosity_level=1
        )

    def __call__(self, question: str) -> str:
        # Positional call
        return self.agent.run(question)


def run_and_submit_all(username):
    # Username provided manually by the user
    if not username:
        return "Please enter your Hugging Face username.", None

    # Fetch questions
    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        resp.raise_for_status()
        questions = resp.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # Run agent
    agent = BasicAgent()
    results = []
    payload = []
    for q in questions:
        tid = q.get('task_id')
        text = q.get('question')
        if not (tid and text):
            continue
        try:
            ans = agent(text)
        except Exception as e:
            ans = f"ERROR: {e}"
        results.append({'Task ID': tid, 'Question': text, 'Answer': ans})
        payload.append({'task_id': tid, 'submitted_answer': ans})

    if not payload:
        return "Agent returned no answers.", pd.DataFrame(results)

    # Submit
    submission = {
        'username': username,
        'agent_code': f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
        'answers': payload
    }
    try:
        sub_resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
        sub_resp.raise_for_status()
        data = sub_resp.json()
        status = (
            f"Submission Successful!\n"
            f"User: {data.get('username')}\n"
            f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
            f"Message: {data.get('message')}"
        )
    except Exception as e:
        status = f"Submission Failed: {e}"

    return status, pd.DataFrame(results)


def test_random_question(username):
    # Use provided username for auth consistency
    if not username:
        return "Please enter your Hugging Face username.", ""
    try:
        q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
        ans = BasicAgent()(q.get('question', ''))
        return q.get('question', ''), ans
    except Exception as e:
        return f"Error during test: {e}", ""

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# Basic Agent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1. Enter your Hugging Face username in the box.
        2. Use **Test Random Question** to check a single question.
        3. Use **Run Evaluation & Submit All Answers** to evaluate on all questions.
        """
    )

    username_input = gr.Textbox(label="Hugging Face Username", placeholder="your-username")
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    test_btn = gr.Button("Test Random Question")

    status_out = gr.Textbox(label="Status / Result", lines=5, interactive=False)
    table_out = gr.DataFrame(label="Full Results Table", wrap=True)
    question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
    answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)

    run_btn.click(fn=run_and_submit_all, inputs=[username_input], outputs=[status_out, table_out])
    test_btn.click(fn=test_random_question, inputs=[username_input], outputs=[question_out, answer_out])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)