Toumaima commited on
Commit
1dc8b46
·
verified ·
1 Parent(s): eaf5277

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -13
app.py CHANGED
@@ -31,12 +31,6 @@ class BasicAgent:
31
  to be put in the list is a number or a string."""
32
  )
33
 
34
-
35
- @app.get("/questions")
36
- def get_questions():
37
- with open("questions.json", "r") as f:
38
- return JSONResponse(content=json.load(f))
39
-
40
  def format_final_answer(self, answer: str) -> str:
41
  cleaned = " ".join(answer.split())
42
  return f"FINAL ANSWER: {cleaned}"
@@ -142,13 +136,11 @@ def question_scorer(model_answer: str, ground_truth: str) -> bool:
142
  if model_answer is None:
143
  model_answer = "None"
144
 
145
- # Case 1: Ground truth is numeric
146
  if is_float(ground_truth):
147
  print(f"Evaluating '{model_answer}' as a number.")
148
  normalized = normalize_number_str(model_answer)
149
  return normalized == float(ground_truth) if normalized is not None else False
150
 
151
- # Case 2: Ground truth is a list
152
  elif any(char in ground_truth for char in [",", ";"]):
153
  print(f"Evaluating '{model_answer}' as a comma/semicolon-separated list.")
154
  gt_elems = split_string(ground_truth)
@@ -168,11 +160,10 @@ def question_scorer(model_answer: str, ground_truth: str) -> bool:
168
  return False
169
  return True
170
 
171
- # Case 3: Ground truth is a plain string
172
  else:
173
  print(f"Evaluating '{model_answer}' as a string.")
174
  return normalize_str(model_answer) == normalize_str(ground_truth)
175
-
176
  def run_and_submit_all(profile: gr.OAuthProfile | None):
177
  space_id = os.getenv("SPACE_ID")
178
  if profile:
@@ -217,7 +208,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
217
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
218
 
219
  if not answers_payload:
220
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
221
  submission_data = {
222
  "username": username.strip(),
223
  "agent_code": agent_code,
@@ -239,7 +230,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
239
  return final_status, pd.DataFrame(results_log)
240
  except Exception as e:
241
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
242
- print(question_scorer("FINAL ANSWER: right",submitted_answer))
243
 
244
  # --- Build Gradio Interface ---
245
  with gr.Blocks() as demo:
@@ -256,4 +246,4 @@ with gr.Blocks() as demo:
256
 
257
  if __name__ == "__main__":
258
  print("Launching Gradio Interface for Basic Agent Evaluation...")
259
- demo.launch(debug=True, share=False)
 
31
  to be put in the list is a number or a string."""
32
  )
33
 
 
 
 
 
 
 
34
  def format_final_answer(self, answer: str) -> str:
35
  cleaned = " ".join(answer.split())
36
  return f"FINAL ANSWER: {cleaned}"
 
136
  if model_answer is None:
137
  model_answer = "None"
138
 
 
139
  if is_float(ground_truth):
140
  print(f"Evaluating '{model_answer}' as a number.")
141
  normalized = normalize_number_str(model_answer)
142
  return normalized == float(ground_truth) if normalized is not None else False
143
 
 
144
  elif any(char in ground_truth for char in [",", ";"]):
145
  print(f"Evaluating '{model_answer}' as a comma/semicolon-separated list.")
146
  gt_elems = split_string(ground_truth)
 
160
  return False
161
  return True
162
 
 
163
  else:
164
  print(f"Evaluating '{model_answer}' as a string.")
165
  return normalize_str(model_answer) == normalize_str(ground_truth)
166
+
167
  def run_and_submit_all(profile: gr.OAuthProfile | None):
168
  space_id = os.getenv("SPACE_ID")
169
  if profile:
 
208
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
209
 
210
  if not answers_payload:
211
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
212
  submission_data = {
213
  "username": username.strip(),
214
  "agent_code": agent_code,
 
230
  return final_status, pd.DataFrame(results_log)
231
  except Exception as e:
232
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
 
233
 
234
  # --- Build Gradio Interface ---
235
  with gr.Blocks() as demo:
 
246
 
247
  if __name__ == "__main__":
248
  print("Launching Gradio Interface for Basic Agent Evaluation...")
249
+ demo.launch(debug=True, share=False)