Ubik80 commited on
Commit
60f0482
·
verified ·
1 Parent(s): 91ff904
Files changed (1) hide show
  1. app.py +62 -128
app.py CHANGED
@@ -1,139 +1,73 @@
1
  import os
2
- import gradio as gr
3
  import requests
4
- import pandas as pd
5
-
6
- from agent import create_agent, fetch_random_question
7
-
8
- # --- Constants ---
9
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
-
11
-
12
- def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
13
  """
14
- Fetch all questions, run the SmolAgent on them, submit all answers,
15
- and display the results.
16
  """
17
- # --- Determine HF Space Runtime URL and Repo URL ---
18
- space_id = os.getenv("SPACE_ID")
19
-
20
- if profile:
21
- username = profile.username
22
- print(f"User logged in: {username}")
23
- else:
24
- print("User not logged in.")
25
- return "Please login to Hugging Face with the button.", None
26
-
27
- api_url = DEFAULT_API_URL
28
- questions_url = f"{api_url}/questions"
29
- submit_url = f"{api_url}/submit"
30
-
31
- # 1. Instantiate SmolAgent
32
- try:
33
- agent = create_agent()
34
- print("SmolAgent initialized.")
35
- except Exception as e:
36
- print(f"Error instantiating agent: {e}")
37
- return f"Error initializing agent: {e}", None
38
-
39
- # Code link for verification
40
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
41
- print(agent_code)
42
-
43
- # 2. Fetch all questions
44
- print(f"Fetching questions from: {questions_url}")
45
- try:
46
- response = requests.get(questions_url, timeout=15)
47
- response.raise_for_status()
48
- questions_data = response.json()
49
- if not questions_data:
50
- print("Fetched questions list is empty.")
51
- return "Fetched questions list is empty or invalid format.", None
52
- print(f"Fetched {len(questions_data)} questions.")
53
- except Exception as e:
54
- print(f"Error fetching questions: {e}")
55
- return f"Error fetching questions: {e}", None
56
-
57
- # 3. Run agent on each question
58
- results_log = []
59
- answers_payload = []
60
- print(f"Running agent on {len(questions_data)} questions...")
61
- for item in questions_data:
62
- task_id = item.get("task_id")
63
- question_text = item.get("question")
64
- if not task_id or question_text is None:
65
- print(f"Skipping invalid item: {item}")
66
- continue
67
- try:
68
- answer = agent.run(question=question_text)
69
- answers_payload.append({"task_id": task_id, "submitted_answer": answer})
70
- results_log.append({"Task ID": task_id, "Question": question_text, "Answer": answer})
71
- except Exception as e:
72
- print(f"Error on task {task_id}: {e}")
73
- results_log.append({"Task ID": task_id, "Question": question_text, "Answer": f"ERROR: {e}"})
74
-
75
- if not answers_payload:
76
- return "Agent produced no answers.", pd.DataFrame(results_log)
77
-
78
- # 4. Submit answers
79
- payload = {"username": username, "agent_code": agent_code, "answers": answers_payload}
80
- print(f"Submitting {len(answers_payload)} answers...")
81
- try:
82
- resp = requests.post(submit_url, json=payload, timeout=60)
83
- resp.raise_for_status()
84
- data = resp.json()
85
- status = (
86
- f"Submission Successful!\n"
87
- f"User: {data.get('username')}\n"
88
- f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n"
89
- f"Message: {data.get('message')}"
90
- )
91
- return status, pd.DataFrame(results_log)
92
- except Exception as e:
93
- print(f"Submission error: {e}")
94
- return f"Submission Failed: {e}", pd.DataFrame(results_log)
95
 
96
-
97
- def test_random_question(profile: gr.OAuthProfile | None):
98
  """
99
- Fetch a random GAIA question and get the agent's answer for testing.
100
  """
101
- if not profile:
102
- return "Please login to test.", ""
103
-
104
- try:
105
- q = fetch_random_question()
106
- agent = create_agent()
107
- ans = agent.run(question=q.get('question', ''))
108
- return q.get('question', ''), ans
109
- except Exception as e:
110
- print(f"Test error: {e}")
111
- return f"Error: {e}", ""
112
 
 
 
 
 
 
 
 
 
113
 
114
- # --- Build Gradio Interface ---
115
- with gr.Blocks() as demo:
116
- gr.Markdown("# SmolAgent Evaluation Runner")
117
- gr.Markdown(
118
- """
119
- **Instructions:**
120
- 1. Clone this space and define your agent logic in agent.py.
121
- 2. Log in with your Hugging Face account.
122
- 3. Use 'Run Evaluation & Submit All Answers' or 'Test Random Question'.
123
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  )
 
 
 
 
 
 
125
 
126
- login = gr.LoginButton()
127
- run_all_btn = gr.Button("Run Evaluation & Submit All Answers")
128
- test_btn = gr.Button("Test Random Question")
129
-
130
- status_box = gr.Textbox(label="Status / Result", lines=5, interactive=False)
131
- results_table = gr.DataFrame(label="Full Results Table", wrap=True)
132
- question_box = gr.Textbox(label="Random Question", lines=3, interactive=False)
133
- answer_box = gr.Textbox(label="Agent Answer", lines=3, interactive=False)
134
-
135
- run_all_btn.click(fn=run_and_submit_all, inputs=[login], outputs=[status_box, results_table])
136
- test_btn.click(fn=test_random_question, inputs=[login], outputs=[question_box, answer_box])
137
-
138
- if __name__ == "__main__":
139
- demo.launch(debug=True, share=False)
 
1
  import os
 
2
  import requests
3
+ from smolagents import CodeAgent, tool, OpenAIServerModel
4
+
5
+ # ------------------------
6
+ # Constants
7
+ # ------------------------
8
+ API_URL = "https://agents-course-unit4-scoring.hf.space"
9
+
10
+ # ------------------------
11
+ # Tool definitions
12
+ # ------------------------
13
+ @tool
14
+ def fetch_questions() -> list:
15
  """
16
+ Fetch the full list of GAIA evaluation questions.
 
17
  """
18
+ response = requests.get(f"{API_URL}/questions", timeout=15)
19
+ response.raise_for_status()
20
+ return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ @tool
23
+ def fetch_random_question() -> dict:
24
  """
25
+ Fetch a single random GAIA question.
26
  """
27
+ response = requests.get(f"{API_URL}/random-question", timeout=15)
28
+ response.raise_for_status()
29
+ return response.json()
 
 
 
 
 
 
 
 
30
 
31
+ @tool
32
+ def fetch_file(task_id: str) -> bytes:
33
+ """
34
+ Download a file associated with a given task_id.
35
+ """
36
+ response = requests.get(f"{API_URL}/files/{task_id}", timeout=15)
37
+ response.raise_for_status()
38
+ return response.content
39
 
40
+ @tool
41
+ def submit_answers(username: str, agent_code: str, answers: list) -> dict:
42
+ """
43
+ Submit the agent's answers to GAIA and return the scoring.
44
+ """
45
+ payload = {
46
+ "username": username,
47
+ "agent_code": agent_code,
48
+ "answers": answers
49
+ }
50
+ response = requests.post(f"{API_URL}/submit", json=payload, timeout=60)
51
+ response.raise_for_status()
52
+ return response.json()
53
+
54
+ # ------------------------
55
+ # Agent factory
56
+ # ------------------------
57
+ def create_agent() -> CodeAgent:
58
+ """
59
+ Factory that returns a configured CodeAgent instance.
60
+ Requires OPENAI_API_KEY in environment.
61
+ """
62
+ # Initialize the LLM with OpenAI API
63
+ llm = OpenAIServerModel(
64
+ model_id=os.getenv("OPENAI_MODEL_ID", "gpt-3.5-turbo"),
65
+ api_key=os.getenv("OPENAI_API_KEY")
66
  )
67
+ # Create agent with defined tools
68
+ agent = CodeAgent(
69
+ tools=[fetch_questions, fetch_random_question, fetch_file, submit_answers],
70
+ model=llm
71
+ )
72
+ return agent
73