Ubik80 commited on
Commit
0e6b913
·
verified ·
1 Parent(s): e0f838b
Files changed (1) hide show
  1. app.py +51 -41
app.py CHANGED
@@ -3,69 +3,75 @@ import gradio as gr
3
  import requests
4
  import pandas as pd
5
 
6
- from tools import FinalAnswerTool
7
- from smolagents import CodeAgent, OpenAIServerModel
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  class BasicAgent:
13
  def __init__(self):
14
- # Use GPT-4o; ensure your OpenAI API key has access
15
- model = OpenAIServerModel(model_id="gpt-4o")
16
- final_tool = FinalAnswerTool()
17
  self.agent = CodeAgent(
18
- model=model,
19
- tools=[final_tool],
20
- max_steps=3,
21
- verbosity_level=1
 
22
  )
23
 
24
  def __call__(self, question: str) -> str:
25
- # Positional call
26
  return self.agent.run(question)
27
 
28
 
29
- def run_and_submit_all(username):
30
- # Username provided manually by the user
 
 
 
 
 
 
31
  if not username:
32
- return "Please enter your Hugging Face username.", None
33
 
34
- # Fetch questions
 
35
  try:
36
- resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
37
  resp.raise_for_status()
38
  questions = resp.json()
39
  except Exception as e:
40
  return f"Error fetching questions: {e}", None
41
 
42
- # Run agent
43
  agent = BasicAgent()
44
- results = []
45
- payload = []
46
  for q in questions:
47
- tid = q.get('task_id')
48
- text = q.get('question')
49
- if not (tid and text):
50
  continue
51
  try:
52
  ans = agent(text)
53
  except Exception as e:
54
  ans = f"ERROR: {e}"
55
- results.append({'Task ID': tid, 'Question': text, 'Answer': ans})
56
- payload.append({'task_id': tid, 'submitted_answer': ans})
57
 
58
  if not payload:
59
  return "Agent returned no answers.", pd.DataFrame(results)
60
 
61
- # Submit
 
62
  submission = {
63
- 'username': username,
64
- 'agent_code': f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
65
- 'answers': payload
66
  }
67
  try:
68
- sub_resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
69
  sub_resp.raise_for_status()
70
  data = sub_resp.json()
71
  status = (
@@ -80,30 +86,33 @@ def run_and_submit_all(username):
80
  return status, pd.DataFrame(results)
81
 
82
 
83
- def test_random_question(username):
84
- # Use provided username for auth consistency
85
- if not username:
86
- return "Please enter your Hugging Face username.", ""
 
 
87
  try:
88
  q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
89
- ans = BasicAgent()(q.get('question', ''))
90
- return q.get('question', ''), ans
 
91
  except Exception as e:
92
  return f"Error during test: {e}", ""
93
 
94
- # --- Gradio UI ---
95
  with gr.Blocks() as demo:
96
  gr.Markdown("# Basic Agent Evaluation Runner")
97
  gr.Markdown(
98
  """
99
  **Instructions:**
100
- 1. Enter your Hugging Face username in the box.
101
- 2. Use **Test Random Question** to check a single question.
102
- 3. Use **Run Evaluation & Submit All Answers** to evaluate on all questions.
103
  """
104
  )
105
 
106
- username_input = gr.Textbox(label="Hugging Face Username", placeholder="your-username")
107
  run_btn = gr.Button("Run Evaluation & Submit All Answers")
108
  test_btn = gr.Button("Test Random Question")
109
 
@@ -112,8 +121,9 @@ with gr.Blocks() as demo:
112
  question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
113
  answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)
114
 
115
- run_btn.click(fn=run_and_submit_all, inputs=[username_input], outputs=[status_out, table_out])
116
- test_btn.click(fn=test_random_question, inputs=[username_input], outputs=[question_out, answer_out])
 
117
 
118
  if __name__ == "__main__":
119
  demo.launch(debug=True, share=False)
 
3
  import requests
4
  import pandas as pd
5
 
6
+ from tools import AnswerTool
7
+ from smolagents import CodeAgent
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  class BasicAgent:
13
  def __init__(self):
14
+ # Initialize CodeAgent with a single custom AnswerTool to handle GAIA Level 1 questions
 
 
15
  self.agent = CodeAgent(
16
+ model=None,
17
+ tools=[AnswerTool()],
18
+ add_base_tools=False,
19
+ max_steps=1,
20
+ verbosity_level=0
21
  )
22
 
23
  def __call__(self, question: str) -> str:
24
+ # Directly run the agent on the question (single-step tool invocation)
25
  return self.agent.run(question)
26
 
27
 
28
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
29
+ """
30
+ Fetch all GAIA Level 1 questions, run the BasicAgent, submit answers, and display results.
31
+ """
32
+ space_id = os.getenv("SPACE_ID")
33
+ if not profile:
34
+ return "Please login to Hugging Face with the login button.", None
35
+ username = getattr(profile, "username", None) or getattr(profile, "name", None)
36
  if not username:
37
+ return "Login error: username not found.", None
38
 
39
+ # 1. Fetch questions
40
+ questions_url = f"{DEFAULT_API_URL}/questions"
41
  try:
42
+ resp = requests.get(questions_url, timeout=15)
43
  resp.raise_for_status()
44
  questions = resp.json()
45
  except Exception as e:
46
  return f"Error fetching questions: {e}", None
47
 
48
+ # 2. Run agent on each question
49
  agent = BasicAgent()
50
+ results, payload = [], []
 
51
  for q in questions:
52
+ task_id = q.get("task_id")
53
+ text = q.get("question")
54
+ if not task_id or not text:
55
  continue
56
  try:
57
  ans = agent(text)
58
  except Exception as e:
59
  ans = f"ERROR: {e}"
60
+ results.append({"Task ID": task_id, "Question": text, "Answer": ans})
61
+ payload.append({"task_id": task_id, "submitted_answer": ans})
62
 
63
  if not payload:
64
  return "Agent returned no answers.", pd.DataFrame(results)
65
 
66
+ # 3. Submit answers
67
+ submit_url = f"{DEFAULT_API_URL}/submit"
68
  submission = {
69
+ "username": username.strip(),
70
+ "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
71
+ "answers": payload
72
  }
73
  try:
74
+ sub_resp = requests.post(submit_url, json=submission, timeout=60)
75
  sub_resp.raise_for_status()
76
  data = sub_resp.json()
77
  status = (
 
86
  return status, pd.DataFrame(results)
87
 
88
 
89
+ def test_random_question(profile: gr.OAuthProfile | None):
90
+ """
91
+ Fetch a single random GAIA question and return the agent's answer.
92
+ """
93
+ if not profile:
94
+ return "Please login to Hugging Face with the login button.", ""
95
  try:
96
  q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
97
+ question = q.get("question", "")
98
+ ans = BasicAgent()(question)
99
+ return question, ans
100
  except Exception as e:
101
  return f"Error during test: {e}", ""
102
 
103
+ # --- Gradio Interface ---
104
  with gr.Blocks() as demo:
105
  gr.Markdown("# Basic Agent Evaluation Runner")
106
  gr.Markdown(
107
  """
108
  **Instructions:**
109
+ 1. Clone this space and define your agent logic in `tools.py`.
110
+ 2. Log in with your Hugging Face account using the login button below.
111
+ 3. Use **Run Evaluation & Submit All Answers** or **Test Random Question**.
112
  """
113
  )
114
 
115
+ login = gr.LoginButton()
116
  run_btn = gr.Button("Run Evaluation & Submit All Answers")
117
  test_btn = gr.Button("Test Random Question")
118
 
 
121
  question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
122
  answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)
123
 
124
+ # Wire buttons to callbacks; LoginButton auto-passes profile
125
+ run_btn.click(fn=run_and_submit_all, inputs=[login], outputs=[status_out, table_out])
126
+ test_btn.click(fn=test_random_question, inputs=[login], outputs=[question_out, answer_out])
127
 
128
  if __name__ == "__main__":
129
  demo.launch(debug=True, share=False)