File size: 3,517 Bytes
bbf17d2
 
 
 
5d6fe2e
bbf17d2
 
 
 
273ef8a
72f623a
 
 
2e72f39
 
7f9795b
 
 
 
6b9c0e4
 
273ef8a
 
 
 
 
7f9795b
6b9c0e4
273ef8a
 
7f9795b
273ef8a
 
bbf17d2
273ef8a
7f9795b
bbf17d2
 
273ef8a
bbf17d2
273ef8a
bbf17d2
7f9795b
 
bbf17d2
273ef8a
 
 
7f9795b
bbf17d2
 
7f9795b
 
 
bbf17d2
7f9795b
bbf17d2
7f9795b
273ef8a
bbf17d2
273ef8a
bbf17d2
273ef8a
7f9795b
bbf17d2
 
 
273ef8a
 
 
 
 
 
 
 
 
 
bbf17d2
273ef8a
bbf17d2
 
 
273ef8a
 
 
 
7f9795b
273ef8a
 
7f9795b
273ef8a
bbf17d2
 
273ef8a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import gradio as gr
import requests
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- BasicAgent Class ---
class BasicAgent:
    def __init__(self):
        print("BasicAgent initialized.")
        self.llm = AutoModelForCausalLM.from_pretrained("gpt2")
        self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
        self.agent_prompt = (
            "You are a general AI assistant. I will ask you a question. "
            "Finish your answer with the format: FINAL ANSWER: [YOUR FINAL ANSWER]."
        )

    def __call__(self, question: str) -> str:
        prompt = f"{self.agent_prompt}\n\nQuestion: {question}"
        inputs = self.tokenizer(prompt, return_tensors="pt")
        outputs = self.llm.generate(**inputs, max_new_tokens=50)
        result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        final = result.split("FINAL ANSWER:")[-1].strip()
        return f"FINAL ANSWER: {final}" if final else "FINAL ANSWER: UNKNOWN"

# --- Run and Submit Function ---
def run_and_submit_all(profile):
    space_id = os.getenv("SPACE_ID", "your-username/your-space")  # fallback
    if not profile or not getattr(profile, "username", None):
        return "❌ Please log in to Hugging Face first.", None

    username = profile.username
    agent = BasicAgent()

    try:
        questions = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15).json()
    except Exception as e:
        return f"❌ Error fetching questions: {e}", None

    answers = []
    log = []

    for q in questions:
        task_id = q.get("task_id")
        question = q.get("question")
        if not task_id or not question:
            continue
        try:
            answer = agent(question)
            answers.append({"task_id": task_id, "submitted_answer": answer})
            log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
        except Exception as e:
            log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"ERROR: {e}"})

    if not answers:
        return "⚠️ No answers were generated.", pd.DataFrame(log)

    submission = {
        "username": username.strip(),
        "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
        "answers": answers
    }

    try:
        r = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=30)
        r.raise_for_status()
        res = r.json()
        return (
            f"✅ Submission Successful!\n"
            f"User: {res.get('username')}\n"
            f"Score: {res.get('score', 'N/A')}% "
            f"({res.get('correct_count', '?')}/{res.get('total_attempted', '?')})\n"
            f"Message: {res.get('message', '')}"
        ), pd.DataFrame(log)
    except Exception as e:
        return f"❌ Submission failed: {e}", pd.DataFrame(log)

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# Basic Agent Evaluation")
    gr.Markdown("Login with Hugging Face and click the button to run evaluation and submit your answers.")

    profile = gr.LoginButton()
    run_button = gr.Button("Run and Submit")
    status_output = gr.Textbox(label="Submission Status", lines=4)
    results_table = gr.DataFrame(label="Answers Submitted")

    run_button.click(fn=run_and_submit_all, inputs=[profile], outputs=[status_output, results_table])

if __name__ == "__main__":
    demo.launch()