Ubik80 commited on
Commit
6a38a35
·
verified ·
1 Parent(s): 0e6388c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -65
app.py CHANGED
@@ -1,81 +1,122 @@
1
  import os
 
2
  import requests
3
- from smolagents import CodeAgent, tool, OpenAIServerModel
4
-
5
- # ------------------------
6
- # Constants
7
- # ------------------------
8
- API_URL = "https://agents-course-unit4-scoring.hf.space"
9
-
10
- # ------------------------
11
- # Tool definitions
12
- # ------------------------
13
- @tool
14
- def fetch_questions() -> list:
15
- """
16
- Fetch the full list of GAIA evaluation questions.
17
 
18
- Returns:
19
- list: A list of question dicts, each with 'task_id' and 'question'.
20
- """
21
- resp = requests.get(f"{API_URL}/questions", timeout=15)
22
- resp.raise_for_status()
23
- return resp.json()
24
 
 
 
25
 
26
- @tool
27
- def fetch_random_question() -> dict:
28
- """
29
- Fetch a single random GAIA question.
30
 
31
- Returns:
32
- dict: A dict with keys 'task_id' and 'question'.
33
  """
34
- resp = requests.get(f"{API_URL}/random-question", timeout=15)
35
- resp.raise_for_status()
36
- return resp.json()
 
37
 
 
 
 
38
 
39
- @tool
40
- def submit_answers(username: str, agent_code: str, answers: list) -> dict:
41
- """
42
- Submit the agent's answers to GAIA and get the scoring.
 
43
 
44
- Args:
45
- username (str): The Hugging Face username identifying the submission.
46
- agent_code (str): URL to your Space code repository for verification.
47
- answers (list): A list of dicts, each with 'task_id' and 'submitted_answer'.
48
 
49
- Returns:
50
- dict: A dict containing 'score', 'correct_count', 'total_attempted', 'message', etc.
51
- """
52
- payload = {
53
- "username": username,
54
- "agent_code": agent_code,
55
- "answers": answers
56
- }
57
- resp = requests.post(f"{API_URL}/submit", json=payload, timeout=60)
58
- resp.raise_for_status()
59
- return resp.json()
60
 
 
 
61
 
62
- def create_agent() -> CodeAgent:
63
- """
64
- Build and return a configured CodeAgent using OpenAI GPT-3.5 Turbo.
65
- Requires OPENAI_API_KEY in the environment.
 
 
 
 
 
 
 
 
 
 
66
 
67
- Returns:
68
- CodeAgent: An instance of CodeAgent configured with the GAIA tools.
69
- """
70
- # Use 'model_id' to match the OpenAIServerModel signature
71
- model = OpenAIServerModel(model_id="gpt-3.5-turbo")
72
- agent = CodeAgent(
73
- tools=[fetch_questions, fetch_random_question, submit_answers],
74
- model=model,
75
- prompt_template=(
76
- "Here is a GAIA question:\n"
77
- "{question}\n"
78
- "Provide ONLY the exact answer (exact-match), with no extra text."
79
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  )
81
- return agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import gradio as gr
3
  import requests
4
+ import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ from agent import create_agent, fetch_random_question
 
 
 
 
 
7
 
8
+ # --- Constants ---
9
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
 
 
 
 
11
 
12
+ def run_and_submit_all(profile):
 
13
  """
14
+ Fetch all questions, run the SmolAgent on them, submit all answers,
15
+ and display the results.
16
+ """
17
+ space_id = os.getenv("SPACE_ID")
18
 
19
+ if profile is None:
20
+ return "Please login to Hugging Face with the button.", None
21
+ username = profile.username
22
 
23
+ # Instantiate the agent
24
+ try:
25
+ agent = create_agent()
26
+ except Exception as e:
27
+ return f"Error initializing agent: {e}", None
28
 
29
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
 
 
30
 
31
+ # Fetch questions
32
+ try:
33
+ response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
34
+ response.raise_for_status()
35
+ questions = response.json()
36
+ except Exception as e:
37
+ return f"Error fetching questions: {e}", None
 
 
 
 
38
 
39
+ if not questions:
40
+ return "No questions fetched.", None
41
 
42
+ # Run agent on each question
43
+ results = []
44
+ payload = []
45
+ for q in questions:
46
+ task_id = q.get("task_id")
47
+ question_text = q.get("question")
48
+ if not task_id or not question_text:
49
+ continue
50
+ try:
51
+ answer = agent.run(question=question_text)
52
+ except Exception as e:
53
+ answer = f"ERROR: {e}"
54
+ results.append({"Task ID": task_id, "Question": question_text, "Answer": answer})
55
+ payload.append({"task_id": task_id, "submitted_answer": answer})
56
 
57
+ # Submit answers
58
+ submit_payload = {"username": username, "agent_code": agent_code, "answers": payload}
59
+ try:
60
+ resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submit_payload, timeout=60)
61
+ resp.raise_for_status()
62
+ data = resp.json()
63
+ status = (
64
+ f"Submission Successful!\n"
65
+ f"User: {data.get('username')}\n"
66
+ f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
67
+ f"Message: {data.get('message')}"
 
68
  )
69
+ except Exception as e:
70
+ status = f"Submission Failed: {e}"
71
+
72
+ return status, pd.DataFrame(results)
73
+
74
+
75
+ def test_random_question(profile):
76
+ """
77
+ Fetch a random GAIA question and return its answer by the agent.
78
+ """
79
+ if profile is None:
80
+ return "Please login to test.", ""
81
+ try:
82
+ q = fetch_random_question()
83
+ agent = create_agent()
84
+ ans = agent.run(question=q.get("question", ""))
85
+ return q.get("question", ""), ans
86
+ except Exception as e:
87
+ return f"Error: {e}", ""
88
+
89
+
90
+ # --- Gradio Interface ---
91
+ with gr.Blocks() as demo:
92
+ gr.Markdown("# SmolAgent Evaluation Runner")
93
+ gr.Markdown(
94
+ """
95
+ **Istruzioni:**
96
+ 1. Clone questo space e definisci la logica in agent.py.
97
+ 2. Effettua il login con il tuo account Hugging Face.
98
+ 3. Usa 'Run Evaluation & Submit All Answers' o 'Test Random Question'.
99
+ """
100
  )
101
+
102
+ # OAuth login and user state
103
+ login = gr.LoginButton()
104
+ user = gr.State()
105
+ # On login, store profile in state
106
+ login.click(fn=lambda profile: profile, inputs=[login], outputs=[user])
107
+
108
+ run_all = gr.Button("Run Evaluation & Submit All Answers")
109
+ test = gr.Button("Test Random Question")
110
+
111
+ status = gr.Textbox(label="Status / Risultato", lines=5, interactive=False)
112
+ table = gr.DataFrame(label="Risultati Completi", wrap=True)
113
+ qbox = gr.Textbox(label="Domanda Casuale", lines=3, interactive=False)
114
+ abox = gr.Textbox(label="Risposta Agente", lines=3, interactive=False)
115
+
116
+ # Use stored user state as input
117
+ run_all.click(fn=run_and_submit_all, inputs=[user], outputs=[status, table])
118
+ test.click(fn=test_random_question, inputs=[user], outputs=[qbox, abox])
119
+
120
+ if __name__ == "__main__":
121
+ demo.launch(debug=True, share=False)
122
+ demo.launch(debug=True, share=False)