File size: 6,115 Bytes
211cd46
10e9b7d
6a38a35
eccf8e4
6a38a35
9b3df85
 
bee5328
9b3df85
b7a3c71
94aca96
bee5328
211cd46
6a38a35
bee5328
9b3df85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0cc1b7
 
9b3df85
 
211cd46
9b3df85
 
 
 
 
e0cc1b7
b7a3c71
9b3df85
 
 
 
 
 
e0cc1b7
 
9b3df85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0cc1b7
211cd46
9307ac3
c396a92
9307ac3
c396a92
94aca96
6a38a35
9307ac3
d1c8ce2
 
e0cc1b7
 
b7a3c71
6a38a35
bee5328
9b3df85
 
9307ac3
 
9b3df85
 
c396a92
9b3df85
94aca96
9307ac3
de8170e
6a38a35
9b3df85
6a38a35
fd5a08b
94aca96
 
bd7cd5b
 
c396a92
41085c3
9b3df85
bd7cd5b
9b3df85
 
 
c396a92
6a38a35
9307ac3
e0cc1b7
 
6a38a35
 
de8170e
 
 
0e6388c
6a38a35
 
 
 
 
211cd46
9307ac3
 
 
e0cc1b7
 
94aca96
9b3df85
0e6b913
e0cc1b7
 
6a38a35
211cd46
6a38a35
e0cc1b7
de8170e
 
 
9307ac3
 
 
de8170e
 
21325a3
9307ac3
9b3df85
 
21325a3
9b3df85
 
bd7cd5b
9b3df85
6a38a35
9b3df85
9307ac3
6a38a35
 
38c5e1d
211cd46
9b3df85
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# app.py
import os
import gradio as gr
import requests
import pandas as pd
import tempfile
from pathlib import Path

from tools import AnswerTool, SpeechToTextTool, ExcelToTextTool
from smolagents import CodeAgent, OpenAIServerModel
from smolagents import DuckDuckGoSearchTool, WikipediaSearchTool

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


def download_file_if_any(base_api_url: str, task_id: str) -> str | None:
    """
    Try GET /files/{task_id}.
    • On HTTP 200 → save to a temp dir and return local path.
    • On 404 → return None.
    • On other errors → raise so caller can log / handle.
    """
    url = f"{base_api_url}/files/{task_id}"
    try:
        resp = requests.get(url, timeout=30)
        if resp.status_code == 404:
            return None
        resp.raise_for_status()
    except requests.exceptions.HTTPError as e:
        raise e

    # Determine filename from headers or default to task_id
    cd = resp.headers.get("content-disposition", "")
    filename = task_id
    if "filename=" in cd:
        import re
        m = re.search(r'filename="([^"]+)"', cd)
        if m:
            filename = m.group(1)

    # Save to temp dir
    tmp_dir = Path(tempfile.gettempdir()) / "gaia_files"
    tmp_dir.mkdir(exist_ok=True)
    file_path = tmp_dir / filename
    with open(file_path, "wb") as f:
        f.write(resp.content)
    return str(file_path)


class BasicAgent:
    def __init__(self):
        # Initialize CodeAgent with GPT-4o, file/audio/excel, web and wiki tools, plus final answer
        model = OpenAIServerModel(model_id="gpt-4o")
        answer_tool = AnswerTool()
        speech_tool = SpeechToTextTool()
        excel_tool = ExcelToTextTool()
        web_tool = DuckDuckGoSearchTool()
        wiki_tool = WikipediaSearchTool()

        self.agent = CodeAgent(
            model=model,
            tools=[speech_tool, excel_tool, wiki_tool, web_tool, answer_tool],
            add_base_tools=False,
            additional_authorized_imports=["pandas", "openpyxl"],
            max_steps=4,
            planning_interval=1,
            verbosity_level=1
        )

    def __call__(self, task_id: str, question: str) -> str:
        # Pre-fetch any file for this task
        file_path = None
        try:
            file_path = download_file_if_any(DEFAULT_API_URL, task_id)
        except Exception:
            pass

        # Build prompt including file context if any
        if file_path:
            prompt = f"{question}\n\n---\nA file for this task was downloaded and saved at: {file_path}\n---"
        else:
            prompt = question

        return self.agent.run(prompt)


def run_and_submit_all(username):
    if not username:
        return "Please enter your Hugging Face username.", None

    # 1. Fetch questions
    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        if resp.status_code == 429:
            return "Server rate limited the requests. Please wait a moment and try again.", None
        resp.raise_for_status()
        questions = resp.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 2. Instantiate agent
    agent = BasicAgent()
    results = []
    payload = []

    # 3. Run agent on all questions
    for q in questions:
        tid = q.get("task_id")
        text = q.get("question")
        if not (tid and text):
            continue
        try:
            ans = agent(tid, text)
        except Exception as e:
            ans = f"ERROR: {e}"
        results.append({"Task ID": tid, "Question": text, "Answer": ans})
        payload.append({"task_id": tid, "submitted_answer": ans})

    if not payload:
        return "Agent returned no answers.", pd.DataFrame(results)

    # 4. Submit answers
    submission = {
        "username": username,
        "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
        "answers": payload
    }
    try:
        sub_resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
        sub_resp.raise_for_status()
        data = sub_resp.json()
        status = (
            f"Submission Successful!\n"
            f"User: {data.get('username')}\n"
            f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
            f"Message: {data.get('message')}"
        )
    except Exception as e:
        status = f"Submission Failed: {e}"

    return status, pd.DataFrame(results)


def test_random_question(username):
    if not username:
        return "Please enter your Hugging Face username.", ""
    try:
        q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
        question = q.get("question", "")
        ans = BasicAgent()(q.get('task_id'), question)
        return question, ans
    except Exception as e:
        return f"Error during test: {e}", ""

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# Basic Agent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1. Enter your Hugging Face username.
        2. Use **Test Random Question** to check a single question.
        3. Use **Run Evaluation & Submit All Answers** to evaluate on all questions.
        """
    )

    username_input = gr.Textbox(label="Hugging Face Username", placeholder="your-username")
    run_btn = gr.Button("Run Evaluation & Submit All Answers")
    test_btn = gr.Button("Test Random Question")

    status_out = gr.Textbox(label="Status / Result", lines=5, interactive=False)
    table_out = gr.DataFrame(label="Full Results Table", wrap=True)
    question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
    answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)

    run_btn.click(fn=run_and_submit_all, inputs=[username_input], outputs=[status_out, table_out])
    test_btn.click(fn=test_random_question, inputs=[username_input], outputs=[question_out, answer_out])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)