Ubik80 commited on
Commit
9307ac3
·
verified ·
1 Parent(s): 0e6b913

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -43
app.py CHANGED
@@ -11,67 +11,62 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  class BasicAgent:
13
  def __init__(self):
14
- # Initialize CodeAgent with a single custom AnswerTool to handle GAIA Level 1 questions
 
 
15
  self.agent = CodeAgent(
16
  model=None,
17
- tools=[AnswerTool()],
18
  add_base_tools=False,
19
  max_steps=1,
20
  verbosity_level=0
21
  )
22
 
23
  def __call__(self, question: str) -> str:
24
- # Directly run the agent on the question (single-step tool invocation)
25
  return self.agent.run(question)
26
 
27
 
28
- def run_and_submit_all(profile: gr.OAuthProfile | None):
29
- """
30
- Fetch all GAIA Level 1 questions, run the BasicAgent, submit answers, and display results.
31
- """
32
- space_id = os.getenv("SPACE_ID")
33
- if not profile:
34
- return "Please login to Hugging Face with the login button.", None
35
- username = getattr(profile, "username", None) or getattr(profile, "name", None)
36
  if not username:
37
- return "Login error: username not found.", None
38
 
39
- # 1. Fetch questions
40
- questions_url = f"{DEFAULT_API_URL}/questions"
41
  try:
42
- resp = requests.get(questions_url, timeout=15)
43
  resp.raise_for_status()
44
  questions = resp.json()
45
  except Exception as e:
46
  return f"Error fetching questions: {e}", None
47
 
48
- # 2. Run agent on each question
49
  agent = BasicAgent()
50
- results, payload = [], []
 
51
  for q in questions:
52
- task_id = q.get("task_id")
53
- text = q.get("question")
54
- if not task_id or not text:
55
  continue
56
  try:
57
  ans = agent(text)
58
  except Exception as e:
59
  ans = f"ERROR: {e}"
60
- results.append({"Task ID": task_id, "Question": text, "Answer": ans})
61
- payload.append({"task_id": task_id, "submitted_answer": ans})
62
 
63
  if not payload:
64
  return "Agent returned no answers.", pd.DataFrame(results)
65
 
66
- # 3. Submit answers
67
- submit_url = f"{DEFAULT_API_URL}/submit"
68
  submission = {
69
- "username": username.strip(),
70
- "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
71
- "answers": payload
72
  }
73
  try:
74
- sub_resp = requests.post(submit_url, json=submission, timeout=60)
75
  sub_resp.raise_for_status()
76
  data = sub_resp.json()
77
  status = (
@@ -86,33 +81,30 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
86
  return status, pd.DataFrame(results)
87
 
88
 
89
- def test_random_question(profile: gr.OAuthProfile | None):
90
- """
91
- Fetch a single random GAIA question and return the agent's answer.
92
- """
93
- if not profile:
94
- return "Please login to Hugging Face with the login button.", ""
95
  try:
96
  q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
97
- question = q.get("question", "")
98
  ans = BasicAgent()(question)
99
  return question, ans
100
  except Exception as e:
101
  return f"Error during test: {e}", ""
102
 
103
- # --- Gradio Interface ---
104
  with gr.Blocks() as demo:
105
  gr.Markdown("# Basic Agent Evaluation Runner")
106
  gr.Markdown(
107
  """
108
  **Instructions:**
109
- 1. Clone this space and define your agent logic in `tools.py`.
110
- 2. Log in with your Hugging Face account using the login button below.
111
- 3. Use **Run Evaluation & Submit All Answers** or **Test Random Question**.
112
  """
113
  )
114
 
115
- login = gr.LoginButton()
116
  run_btn = gr.Button("Run Evaluation & Submit All Answers")
117
  test_btn = gr.Button("Test Random Question")
118
 
@@ -121,9 +113,8 @@ with gr.Blocks() as demo:
121
  question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
122
  answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)
123
 
124
- # Wire buttons to callbacks; LoginButton auto-passes profile
125
- run_btn.click(fn=run_and_submit_all, inputs=[login], outputs=[status_out, table_out])
126
- test_btn.click(fn=test_random_question, inputs=[login], outputs=[question_out, answer_out])
127
 
128
  if __name__ == "__main__":
129
  demo.launch(debug=True, share=False)
 
11
 
12
  class BasicAgent:
13
  def __init__(self):
14
+ # Use a custom AnswerTool to handle GAIA Level 1 questions exactly
15
+ answer_tool = AnswerTool()
16
+ # Initialize CodeAgent with only the AnswerTool, no code execution
17
  self.agent = CodeAgent(
18
  model=None,
19
+ tools=[answer_tool],
20
  add_base_tools=False,
21
  max_steps=1,
22
  verbosity_level=0
23
  )
24
 
25
  def __call__(self, question: str) -> str:
26
+ # Single-step execution: call the AnswerTool
27
  return self.agent.run(question)
28
 
29
 
30
+ def run_and_submit_all(username):
31
+ # Username provided manually by the user
 
 
 
 
 
 
32
  if not username:
33
+ return "Please enter your Hugging Face username.", None
34
 
35
+ # Fetch questions
 
36
  try:
37
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
38
  resp.raise_for_status()
39
  questions = resp.json()
40
  except Exception as e:
41
  return f"Error fetching questions: {e}", None
42
 
43
+ # Run agent on all questions
44
  agent = BasicAgent()
45
+ results = []
46
+ payload = []
47
  for q in questions:
48
+ tid = q.get('task_id')
49
+ text = q.get('question')
50
+ if not (tid and text):
51
  continue
52
  try:
53
  ans = agent(text)
54
  except Exception as e:
55
  ans = f"ERROR: {e}"
56
+ results.append({'Task ID': tid, 'Question': text, 'Answer': ans})
57
+ payload.append({'task_id': tid, 'submitted_answer': ans})
58
 
59
  if not payload:
60
  return "Agent returned no answers.", pd.DataFrame(results)
61
 
62
+ # Submit answers
 
63
  submission = {
64
+ 'username': username,
65
+ 'agent_code': f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
66
+ 'answers': payload
67
  }
68
  try:
69
+ sub_resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
70
  sub_resp.raise_for_status()
71
  data = sub_resp.json()
72
  status = (
 
81
  return status, pd.DataFrame(results)
82
 
83
 
84
+ def test_random_question(username):
85
+ if not username:
86
+ return "Please enter your Hugging Face username.", ""
 
 
 
87
  try:
88
  q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json()
89
+ question = q.get('question', '')
90
  ans = BasicAgent()(question)
91
  return question, ans
92
  except Exception as e:
93
  return f"Error during test: {e}", ""
94
 
95
+ # --- Gradio UI ---
96
  with gr.Blocks() as demo:
97
  gr.Markdown("# Basic Agent Evaluation Runner")
98
  gr.Markdown(
99
  """
100
  **Instructions:**
101
+ 1. Enter your Hugging Face username.
102
+ 2. Use **Test Random Question** to check a single question.
103
+ 3. Use **Run Evaluation & Submit All Answers** to evaluate on all questions.
104
  """
105
  )
106
 
107
+ username_input = gr.Textbox(label="Hugging Face Username", placeholder="your-username")
108
  run_btn = gr.Button("Run Evaluation & Submit All Answers")
109
  test_btn = gr.Button("Test Random Question")
110
 
 
113
  question_out = gr.Textbox(label="Random Question", lines=3, interactive=False)
114
  answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False)
115
 
116
+ run_btn.click(fn=run_and_submit_all, inputs=[username_input], outputs=[status_out, table_out])
117
+ test_btn.click(fn=test_random_question, inputs=[username_input], outputs=[question_out, answer_out])
 
118
 
119
  if __name__ == "__main__":
120
  demo.launch(debug=True, share=False)