Ubik80 commited on
Commit
bee5328
·
verified ·
1 Parent(s): d3f09f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -62
app.py CHANGED
@@ -1,73 +1,123 @@
1
  import os
 
2
  import requests
3
- from smolagents import CodeAgent, tool, OpenAIServerModel
4
-
5
- # ------------------------
6
- # Constants
7
- # ------------------------
8
- API_URL = "https://agents-course-unit4-scoring.hf.space"
9
-
10
- # ------------------------
11
- # Tool definitions
12
- # ------------------------
13
- @tool
14
- def fetch_questions() -> list:
15
- """
16
- Fetch the full list of GAIA evaluation questions.
17
- """
18
- response = requests.get(f"{API_URL}/questions", timeout=15)
19
- response.raise_for_status()
20
- return response.json()
21
 
22
- @tool
23
- def fetch_random_question() -> dict:
24
- """
25
- Fetch a single random GAIA question.
26
- """
27
- response = requests.get(f"{API_URL}/random-question", timeout=15)
28
- response.raise_for_status()
29
- return response.json()
30
 
31
- @tool
32
- def fetch_file(task_id: str) -> bytes:
33
- """
34
- Download a file associated with a given task_id.
35
- """
36
- response = requests.get(f"{API_URL}/files/{task_id}", timeout=15)
37
- response.raise_for_status()
38
- return response.content
39
 
40
- @tool
41
- def submit_answers(username: str, agent_code: str, answers: list) -> dict:
42
  """
43
- Submit the agent's answers to GAIA and return the scoring.
 
44
  """
45
- payload = {
46
- "username": username,
47
- "agent_code": agent_code,
48
- "answers": answers
49
- }
50
- response = requests.post(f"{API_URL}/submit", json=payload, timeout=60)
51
- response.raise_for_status()
52
- return response.json()
53
-
54
- # ------------------------
55
- # Agent factory
56
- # ------------------------
57
- def create_agent() -> CodeAgent:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  """
59
- Factory that returns a configured CodeAgent instance.
60
- Requires OPENAI_API_KEY in environment.
61
  """
62
- # Initialize the LLM with OpenAI API
63
- llm = OpenAIServerModel(
64
- model_id=os.getenv("OPENAI_MODEL_ID", "gpt-3.5-turbo"),
65
- api_key=os.getenv("OPENAI_API_KEY")
66
- )
67
- # Create agent with defined tools
68
- agent = CodeAgent(
69
- tools=[fetch_questions, fetch_random_question, fetch_file, submit_answers],
70
- model=llm
 
 
 
 
 
 
 
 
 
 
 
 
71
  )
72
- return agent
 
 
 
 
 
 
 
 
 
73
 
 
 
 
1
  import os
2
+ import gradio as gr
3
  import requests
4
+ import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ from agent import create_agent, fetch_random_question
 
 
 
 
 
 
 
7
 
8
+ # --- Constants ---
9
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
10
 
11
+
12
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
13
  """
14
+ Fetch all questions, run the SmolAgent on them, submit all answers,
15
+ and display the results.
16
  """
17
+ space_id = os.getenv("SPACE_ID")
18
+
19
+ if profile:
20
+ username = profile.username
21
+ print(f"User logged in: {username}")
22
+ else:
23
+ print("User not logged in.")
24
+ return "Please login to Hugging Face with the button.", None
25
+
26
+ questions_url = f"{DEFAULT_API_URL}/questions"
27
+ submit_url = f"{DEFAULT_API_URL}/submit"
28
+
29
+ try:
30
+ agent = create_agent()
31
+ print("SmolAgent initialized.")
32
+ except Exception as e:
33
+ print(f"Error instantiating agent: {e}")
34
+ return f"Error initializing agent: {e}", None
35
+
36
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
37
+ print(f"Agent code URL: {agent_code}")
38
+
39
+ try:
40
+ response = requests.get(questions_url, timeout=15)
41
+ response.raise_for_status()
42
+ questions = response.json()
43
+ if not questions:
44
+ return "No questions fetched.", None
45
+ print(f"Fetched {len(questions)} questions.")
46
+ except Exception as e:
47
+ print(f"Error fetching questions: {e}")
48
+ return f"Error fetching questions: {e}", None
49
+
50
+ results = []
51
+ payload = []
52
+ for q in questions:
53
+ tid = q.get("task_id")
54
+ text = q.get("question")
55
+ if not tid or not text:
56
+ continue
57
+ try:
58
+ ans = agent.run(question=text)
59
+ except Exception as e:
60
+ ans = f"ERROR: {e}"
61
+ payload.append({"task_id": tid, "submitted_answer": ans})
62
+ results.append({"Task ID": tid, "Question": text, "Answer": ans})
63
+
64
+ if not payload:
65
+ return "Agent returned no answers.", pd.DataFrame(results)
66
+
67
+ submission = {"username": username, "agent_code": agent_code, "answers": payload}
68
+ try:
69
+ resp = requests.post(submit_url, json=submission, timeout=60)
70
+ resp.raise_for_status()
71
+ data = resp.json()
72
+ status = (
73
+ f"Submission Successful!\n"
74
+ f"User: {data.get('username')}\n"
75
+ f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
76
+ f"Message: {data.get('message')}"
77
+ )
78
+ except Exception as e:
79
+ print(f"Submission error: {e}")
80
+ status = f"Submission Failed: {e}"
81
+
82
+ return status, pd.DataFrame(results)
83
+
84
+
85
+ def test_random_question(profile: gr.OAuthProfile | None):
86
  """
87
+ Fetch a random GAIA question and return the agent's answer.
 
88
  """
89
+ if not profile:
90
+ return "Please login to test.", ""
91
+ try:
92
+ q = fetch_random_question()
93
+ agent = create_agent()
94
+ ans = agent.run(question=q.get("question", ""))
95
+ return q.get("question", ""), ans
96
+ except Exception as e:
97
+ print(f"Test error: {e}")
98
+ return f"Error: {e}", ""
99
+
100
+ # --- Gradio Interface ---
101
+ with gr.Blocks() as demo:
102
+ gr.Markdown("# SmolAgent Evaluation Runner")
103
+ gr.Markdown(
104
+ """
105
+ **Istruzioni:**
106
+ 1. Clone questo space e definisci la logica in agent.py.
107
+ 2. Effettua il login con il tuo account Hugging Face.
108
+ 3. Usa 'Run Evaluation & Submit All Answers' o 'Test Random Question'.
109
+ """
110
  )
111
+ login = gr.LoginButton()
112
+ run_all = gr.Button("Run Evaluation & Submit All Answers")
113
+ test = gr.Button("Test Random Question")
114
+ status = gr.Textbox(label="Status / Risultato", lines=5, interactive=False)
115
+ table = gr.DataFrame(label="Risultati Completi", wrap=True)
116
+ qbox = gr.Textbox(label="Domanda Casuale", lines=3, interactive=False)
117
+ abox = gr.Textbox(label="Risposta Agente", lines=3, interactive=False)
118
+
119
+ run_all.click(fn=run_and_submit_all, inputs=[login], outputs=[status, table])
120
+ test.click(fn=test_random_question, inputs=[login], outputs=[qbox, abox])
121
 
122
+ if __name__ == "__main__":
123
+ demo.launch(debug=True, share=False)