Ubik80's picture
Update app.py
94aca96 verified
raw
history blame
4.35 kB
import os
import gradio as gr
import requests
import pandas as pd
from tools import AnswerTool
from smolagents import CodeAgent, OpenAIServerModel
from smolagents import DuckDuckGoSearchTool, WikipediaSearchTool
# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
class BasicAgent:
def __init__(self):
# Initialize CodeAgent with GPT-4o, custom AnswerTool, DuckDuckGo and Wikipedia tools
model = OpenAIServerModel(model_id="gpt-4o")
answer_tool = AnswerTool()
web_tool = DuckDuckGoSearchTool()
wiki_tool = WikipediaSearchTool()
self.agent = CodeAgent(
model=model,
tools=[answer_tool, web_tool, wiki_tool],
add_base_tools=True,
max_steps=2,
verbosity_level=0
)
def __call__(self, question: str) -> str:
# Run the agent on the question
return self.agent.run(question)
def run_and_submit_all(username):
if not username:
return "Please enter your Hugging Face username.", None
# 1. Fetch questions
try:
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
if resp.status_code == 429:
return "Server rate limited the requests. Please wait a moment and try again.", None
resp.raise_for_status()
questions = resp.json()
except Exception as e:
return f"Error fetching questions: {e}", None
# 2. Run agent on all questions
agent = BasicAgent()
results = []
payload = []
for q in questions:
tid = q.get("task_id")
text = q.get("question")
if not (tid and text):
continue
try:
ans = agent(text)
except Exception as e:
ans = f"ERROR: {e}"
results.append({"Task ID": tid, "Question": text, "Answer": ans})
payload.append({"task_id": tid, "submitted_answer": ans})
if not payload:
return "Agent returned no answers.", pd.DataFrame(results)
# 3. Submit answers
submission = {
"username": username,
"agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
"answers": payload
}
try:
sub_resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
sub_resp.raise_for_status()
data = sub_resp.json()
status = (
f"Submission Successful!\n"
f"User: {data.get('username')}\n"
f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
f"Message: {data.get('message')}"
)
except Exception as e:
status = f"Submission Failed: {e}"
return status, pd.DataFrame(results)
def test_random_question(username):
if not username:
return "Please enter your Hugging Face username.", ""
try:
q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
question = q.get("question", "")
ans = BasicAgent()(question)
return question, ans
except Exception as e:
return f"Error during test: {e}", ""
# Build Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Basic Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Enter your Hugging Face username.
2. Use **Test Random Question** to check a single question.
3. Use **Run Evaluation & Submit All Answers** to evaluate on all questions.
"""
)
username_input = gr.Textbox(label="Hugging Face Username", placeholder="your-username")
run_btn = gr.Button("Run Evaluation & Submit All Answers")
test_btn = gr.Button("Test Random Question")
status_out = gr.Textbox(label="Status / Result", lines=5, interactive=False)
table_out = gr.DataFrame(label="Full Results Table", wrap=True)
question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)
run_btn.click(fn=run_and_submit_all, inputs=[username_input], outputs=[status_out, table_out])
test_btn.click(fn=test_random_question, inputs=[username_input], outputs=[question_out, answer_out])
if __name__ == "__main__":
demo.launch(debug=True, share=False)