|
import os |
|
import requests |
|
from smolagents import CodeAgent, tool, OpenAIServerModel |
|
|
|
|
|
|
|
|
|
API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
|
|
|
|
|
|
|
|
@tool |
|
def fetch_questions() -> list: |
|
""" |
|
Fetch the full list of GAIA evaluation questions. |
|
|
|
Returns: |
|
list: A list of question dicts, each with 'task_id' and 'question'. |
|
""" |
|
resp = requests.get(f"{API_URL}/questions", timeout=15) |
|
resp.raise_for_status() |
|
return resp.json() |
|
|
|
|
|
@tool |
|
def fetch_random_question() -> dict: |
|
""" |
|
Fetch a single random GAIA question. |
|
|
|
Returns: |
|
dict: A dict with keys 'task_id' and 'question'. |
|
""" |
|
resp = requests.get(f"{API_URL}/random-question", timeout=15) |
|
resp.raise_for_status() |
|
return resp.json() |
|
|
|
|
|
@tool |
|
def submit_answers(username: str, agent_code: str, answers: list) -> dict: |
|
""" |
|
Submit the agent's answers to GAIA and get the scoring. |
|
|
|
Args: |
|
username (str): The Hugging Face username identifying the submission. |
|
agent_code (str): URL to your Space code repository for verification. |
|
answers (list): A list of dicts, each with 'task_id' and 'submitted_answer'. |
|
|
|
Returns: |
|
dict: A dict containing 'score', 'correct_count', 'total_attempted', 'message', etc. |
|
""" |
|
payload = { |
|
"username": username, |
|
"agent_code": agent_code, |
|
"answers": answers |
|
} |
|
resp = requests.post(f"{API_URL}/submit", json=payload, timeout=60) |
|
resp.raise_for_status() |
|
return resp.json() |
|
|
|
|
|
def create_agent() -> CodeAgent: |
|
""" |
|
Build and return a configured CodeAgent using OpenAI GPT-3.5 Turbo. |
|
Requires OPENAI_API_KEY in the environment. |
|
|
|
Returns: |
|
CodeAgent: An instance of CodeAgent configured with the GAIA tools. |
|
""" |
|
|
|
model = OpenAIServerModel(model_id="gpt-3.5-turbo") |
|
agent = CodeAgent( |
|
tools=[fetch_questions, fetch_random_question, submit_answers], |
|
model=model, |
|
prompt_template=( |
|
"Here is a GAIA question:\n" |
|
"{question}\n" |
|
"Provide ONLY the exact answer (exact-match), with no extra text." |
|
) |
|
) |
|
return agent |
|
|