Toumaima commited on
Commit
70a778a
·
verified ·
1 Parent(s): 2c64e68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -2
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import os
2
  import gradio as gr
3
  import requests
 
 
4
  import pandas as pd
5
  from huggingface_hub import login
6
  import re
@@ -9,6 +11,7 @@ from groq import Groq
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
 
12
  # --- Basic Agent Definition ---
13
  class BasicAgent:
14
  def __init__(self):
@@ -100,7 +103,71 @@ class BasicAgent:
100
  return self.solve_riddle(question)
101
  return self.query_groq(question)
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
 
 
104
  def run_and_submit_all(profile: gr.OAuthProfile | None):
105
  space_id = os.getenv("SPACE_ID")
106
  if profile:
@@ -143,10 +210,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
143
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
144
  except Exception as e:
145
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
146
-
147
  if not answers_payload:
148
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
149
-
150
  submission_data = {
151
  "username": username.strip(),
152
  "agent_code": agent_code,
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ import string
5
+ import warnings
6
  import pandas as pd
7
  from huggingface_hub import login
8
  import re
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
+
15
  # --- Basic Agent Definition ---
16
  class BasicAgent:
17
  def __init__(self):
 
103
  return self.solve_riddle(question)
104
  return self.query_groq(question)
105
 
106
+ def question_scorer(model_answer: str, ground_truth: str) -> bool:
107
+ def normalize_str(input_str, remove_punct=True) -> str:
108
+ no_spaces = re.sub(r"\s", "", input_str)
109
+ if remove_punct:
110
+ translator = str.maketrans("", "", string.punctuation)
111
+ return no_spaces.lower().translate(translator)
112
+ else:
113
+ return no_spaces.lower()
114
+
115
+ def normalize_number_str(number_str: str) -> float | None:
116
+ for char in ["$", "%", ","]:
117
+ number_str = number_str.replace(char, "")
118
+ try:
119
+ return float(number_str)
120
+ except ValueError:
121
+ print(f"String '{number_str}' cannot be normalized to number.")
122
+ return None
123
+
124
+ def split_string(s: str, char_list: list[str] = [",", ";"]) -> list[str]:
125
+ pattern = f"[{''.join(map(re.escape, char_list))}]"
126
+ return [elem.strip() for elem in re.split(pattern, s)]
127
+
128
+ def is_float(val) -> bool:
129
+ try:
130
+ float(val)
131
+ return True
132
+ except ValueError:
133
+ return False
134
+
135
+ if model_answer is None:
136
+ model_answer = "None"
137
+
138
+ # Case 1: Ground truth is numeric
139
+ if is_float(ground_truth):
140
+ print(f"Evaluating '{model_answer}' as a number.")
141
+ normalized = normalize_number_str(model_answer)
142
+ return normalized == float(ground_truth) if normalized is not None else False
143
+
144
+ # Case 2: Ground truth is a list
145
+ elif any(char in ground_truth for char in [",", ";"]):
146
+ print(f"Evaluating '{model_answer}' as a comma/semicolon-separated list.")
147
+ gt_elems = split_string(ground_truth)
148
+ ma_elems = split_string(model_answer)
149
+
150
+ if len(gt_elems) != len(ma_elems):
151
+ warnings.warn("Answer lists have different lengths, returning False.", UserWarning)
152
+ return False
153
+
154
+ for ma_elem, gt_elem in zip(ma_elems, gt_elems):
155
+ if is_float(gt_elem):
156
+ normalized = normalize_number_str(ma_elem)
157
+ if normalized != float(gt_elem):
158
+ return False
159
+ else:
160
+ if normalize_str(ma_elem, remove_punct=False) != normalize_str(gt_elem, remove_punct=False):
161
+ return False
162
+ return True
163
+
164
+ # Case 3: Ground truth is a plain string
165
+ else:
166
+ print(f"Evaluating '{model_answer}' as a string.")
167
+ return normalize_str(model_answer) == normalize_str(ground_truth)
168
 
169
+ print(question_scorer("FINAL ANSWER: right", ))
170
+
171
  def run_and_submit_all(profile: gr.OAuthProfile | None):
172
  space_id = os.getenv("SPACE_ID")
173
  if profile:
 
210
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
211
  except Exception as e:
212
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
213
+
214
  if not answers_payload:
215
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
216
+ print(question_scorer("FINAL ANSWER: right",submitted_answer))
217
  submission_data = {
218
  "username": username.strip(),
219
  "agent_code": agent_code,