File size: 4,430 Bytes
38c5e1d
10e9b7d
6a38a35
eccf8e4
6a38a35
bee5328
0e6b913
b7a3c71
38c5e1d
bee5328
e0cc1b7
6a38a35
bee5328
e0cc1b7
 
38c5e1d
9f4a3ee
9307ac3
38c5e1d
e0cc1b7
b7a3c71
38c5e1d
 
 
0e6b913
e0cc1b7
 
 
38c5e1d
e0cc1b7
 
de8170e
9307ac3
38c5e1d
c396a92
9307ac3
c396a92
b7a3c71
6a38a35
9307ac3
d1c8ce2
 
e0cc1b7
 
b7a3c71
6a38a35
bee5328
9307ac3
e0cc1b7
9307ac3
 
c396a92
9307ac3
 
 
de8170e
6a38a35
e0cc1b7
6a38a35
fd5a08b
9307ac3
 
bd7cd5b
 
c396a92
41085c3
9307ac3
bd7cd5b
9307ac3
 
 
c396a92
6a38a35
9307ac3
e0cc1b7
 
6a38a35
 
de8170e
 
 
0e6388c
6a38a35
 
 
 
 
de8170e
9307ac3
 
 
e0cc1b7
 
9307ac3
0e6b913
 
e0cc1b7
 
6a38a35
38c5e1d
9307ac3
6a38a35
e0cc1b7
de8170e
 
 
9307ac3
 
 
de8170e
 
21325a3
9307ac3
bd7cd5b
 
21325a3
38c5e1d
 
bd7cd5b
38c5e1d
6a38a35
38c5e1d
9307ac3
6a38a35
 
38c5e1d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# app.py
import os
import gradio as gr
import requests
import pandas as pd

from tools import AnswerTool
from smolagents import CodeAgent, OpenAIServerModel
from smolagents import DuckDuckGoSearchTool   # ← importiamo il search tool

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

class BasicAgent:
    def __init__(self):
        # Initialize CodeAgent with GPT-4o, AnswerTool e DuckDuckGoSearchTool
        model = OpenAIServerModel(model_id="gpt-4o")
        answer_tool = AnswerTool()
        web_tool    = DuckDuckGoSearchTool()
        self.agent = CodeAgent(
            model=model,
            tools=[answer_tool, web_tool],  # ← due tool
            add_base_tools=False,           # niente altri tool
            max_steps=2,                    # fino a due passaggi
            verbosity_level=0
        )

    def __call__(self, question: str) -> str:
        # Fai girare l'agente: prima prova AnswerTool, poi DuckDuckGo se serve
        return self.agent.run(question)


def run_and_submit_all(username):
    # Username fornito manualmente
    if not username:
        return "Please enter your Hugging Face username.", None

    # Fetch questions
    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        if resp.status_code == 429:
            return "Server rate limited the requests. Please wait a moment and try again.", None
        resp.raise_for_status()
        questions = resp.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # Run agent on all questions
    agent = BasicAgent()
    results = []
    payload = []
    for q in questions:
        tid = q.get('task_id')
        text = q.get('question')
        if not (tid and text):
            continue
        try:
            ans = agent(text)
        except Exception as e:
            ans = f"ERROR: {e}"
        results.append({'Task ID': tid, 'Question': text, 'Answer': ans})
        payload.append({'task_id': tid, 'submitted_answer': ans})

    if not payload:
        return "Agent returned no answers.", pd.DataFrame(results)

    # Submit answers
    submission = {
        'username': username,
        'agent_code': f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
        'answers': payload
    }
    try:
        sub_resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
        sub_resp.raise_for_status()
        data = sub_resp.json()
        status = (
            f"Submission Successful!\n"
            f"User: {data.get('username')}\n"
            f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
            f"Message: {data.get('message')}"
        )
    except Exception as e:
        status = f"Submission Failed: {e}"

    return status, pd.DataFrame(results)


def test_random_question(username):
    if not username:
        return "Please enter your Hugging Face username.", ""
    try:
        q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
        question = q.get('question', '')
        ans = BasicAgent()(question)
        return question, ans
    except Exception as e:
        return f"Error during test: {e}", ""


# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# Basic Agent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1. Enter your Hugging Face username.
        2. Use **Test Random Question** to check a single question.
        3. Use **Run Evaluation & Submit All Answers** to evaluate on all questions.
        """
    )

    username_input = gr.Textbox(label="Hugging Face Username", placeholder="your-username")
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    test_btn = gr.Button("Test Random Question")

    status_out   = gr.Textbox(label="Status / Result", lines=5, interactive=False)
    table_out    = gr.DataFrame(label="Full Results Table", wrap=True)
    question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
    answer_out   = gr.Textbox(label="Agent Answer", lines=3, interactive=False)

    run_btn.click(fn=run_and_submit_all,  inputs=[username_input], outputs=[status_out, table_out])
    test_btn.click(fn=test_random_question, inputs=[username_input], outputs=[question_out, answer_out])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)