Spaces:
Sleeping
Sleeping
File size: 3,827 Bytes
10e9b7d 6a38a35 eccf8e4 6a38a35 bee5328 e0cc1b7 de8170e bee5328 e0cc1b7 6a38a35 bee5328 e0cc1b7 03a058f 21325a3 e0cc1b7 03a058f e0cc1b7 de8170e 03a058f c396a92 03a058f c396a92 de8170e 6a38a35 e0cc1b7 6a38a35 bee5328 21325a3 e0cc1b7 de8170e c396a92 de8170e 03a058f de8170e 6a38a35 e0cc1b7 6a38a35 fd5a08b de8170e bd7cd5b c396a92 41085c3 21325a3 bd7cd5b de8170e c396a92 6a38a35 e0cc1b7 6a38a35 de8170e 0e6388c 6a38a35 de8170e 03a058f e0cc1b7 de8170e e0cc1b7 6a38a35 e0cc1b7 6a38a35 e0cc1b7 de8170e 03a058f de8170e 21325a3 03a058f bd7cd5b 21325a3 bd7cd5b 6a38a35 03a058f 6a38a35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import os
import gradio as gr
import requests
import pandas as pd
from tools import FinalAnswerTool
from smolagents import CodeAgent, OpenAIServerModel
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
class BasicAgent:
def __init__(self):
# Use GPT-4o; ensure your OpenAI API key has access
model = OpenAIServerModel(model_id="gpt-4o")
final_tool = FinalAnswerTool()
self.agent = CodeAgent(
model=model,
tools=[final_tool],
max_steps=3,
verbosity_level=1
)
def __call__(self, question: str) -> str:
# Positional call
return self.agent.run(question)
def run_and_submit_all(username):
# Username provided manually by the user
if not username:
return "Please enter your Hugging Face username.", None
# Fetch questions
try:
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
resp.raise_for_status()
questions = resp.json()
except Exception as e:
return f"Error fetching questions: {e}", None
# Run agent
agent = BasicAgent()
results = []
payload = []
for q in questions:
tid = q.get('task_id')
text = q.get('question')
if not (tid and text):
continue
try:
ans = agent(text)
except Exception as e:
ans = f"ERROR: {e}"
results.append({'Task ID': tid, 'Question': text, 'Answer': ans})
payload.append({'task_id': tid, 'submitted_answer': ans})
if not payload:
return "Agent returned no answers.", pd.DataFrame(results)
# Submit
submission = {
'username': username,
'agent_code': f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
'answers': payload
}
try:
sub_resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
sub_resp.raise_for_status()
data = sub_resp.json()
status = (
f"Submission Successful!\n"
f"User: {data.get('username')}\n"
f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
f"Message: {data.get('message')}"
)
except Exception as e:
status = f"Submission Failed: {e}"
return status, pd.DataFrame(results)
def test_random_question():
try:
q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
ans = BasicAgent()(q.get('question', ''))
return q.get('question', ''), ans
except Exception as e:
return f"Error during test: {e}", ""
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# Basic Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Enter your Hugging Face username.
2. Use **Test Random Question** to check a single question.
3. Use **Run Evaluation & Submit All Answers** to evaluate on all questions.
"""
)
username_input = gr.Textbox(label="Hugging Face Username", placeholder="your-username")
run_btn = gr.Button("Run Evaluation & Submit All Answers")
test_btn = gr.Button("Test Random Question")
status_out = gr.Textbox(label="Status / Result", lines=5, interactive=False)
table_out = gr.DataFrame(label="Full Results Table", wrap=True)
question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)
run_btn.click(fn=run_and_submit_all, inputs=[username_input], outputs=[status_out, table_out])
test_btn.click(fn=test_random_question, inputs=[], outputs=[question_out, answer_out])
if __name__ == "__main__":
demo.launch(debug=True, share=False)
|