Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
|
|
|
|
4 |
import pandas as pd
|
5 |
from huggingface_hub import login
|
6 |
import re
|
@@ -9,6 +11,7 @@ from groq import Groq
|
|
9 |
# --- Constants ---
|
10 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
11 |
|
|
|
12 |
# --- Basic Agent Definition ---
|
13 |
class BasicAgent:
|
14 |
def __init__(self):
|
@@ -100,7 +103,71 @@ class BasicAgent:
|
|
100 |
return self.solve_riddle(question)
|
101 |
return self.query_groq(question)
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
|
|
|
|
104 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
105 |
space_id = os.getenv("SPACE_ID")
|
106 |
if profile:
|
@@ -143,10 +210,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
143 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
144 |
except Exception as e:
|
145 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
146 |
-
|
147 |
if not answers_payload:
|
148 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
149 |
-
|
150 |
submission_data = {
|
151 |
"username": username.strip(),
|
152 |
"agent_code": agent_code,
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
+
import string
|
5 |
+
import warnings
|
6 |
import pandas as pd
|
7 |
from huggingface_hub import login
|
8 |
import re
|
|
|
11 |
# --- Constants ---
|
12 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
13 |
|
14 |
+
|
15 |
# --- Basic Agent Definition ---
|
16 |
class BasicAgent:
|
17 |
def __init__(self):
|
|
|
103 |
return self.solve_riddle(question)
|
104 |
return self.query_groq(question)
|
105 |
|
106 |
+
def question_scorer(model_answer: str, ground_truth: str) -> bool:
|
107 |
+
def normalize_str(input_str, remove_punct=True) -> str:
|
108 |
+
no_spaces = re.sub(r"\s", "", input_str)
|
109 |
+
if remove_punct:
|
110 |
+
translator = str.maketrans("", "", string.punctuation)
|
111 |
+
return no_spaces.lower().translate(translator)
|
112 |
+
else:
|
113 |
+
return no_spaces.lower()
|
114 |
+
|
115 |
+
def normalize_number_str(number_str: str) -> float | None:
|
116 |
+
for char in ["$", "%", ","]:
|
117 |
+
number_str = number_str.replace(char, "")
|
118 |
+
try:
|
119 |
+
return float(number_str)
|
120 |
+
except ValueError:
|
121 |
+
print(f"String '{number_str}' cannot be normalized to number.")
|
122 |
+
return None
|
123 |
+
|
124 |
+
def split_string(s: str, char_list: list[str] = [",", ";"]) -> list[str]:
|
125 |
+
pattern = f"[{''.join(map(re.escape, char_list))}]"
|
126 |
+
return [elem.strip() for elem in re.split(pattern, s)]
|
127 |
+
|
128 |
+
def is_float(val) -> bool:
|
129 |
+
try:
|
130 |
+
float(val)
|
131 |
+
return True
|
132 |
+
except ValueError:
|
133 |
+
return False
|
134 |
+
|
135 |
+
if model_answer is None:
|
136 |
+
model_answer = "None"
|
137 |
+
|
138 |
+
# Case 1: Ground truth is numeric
|
139 |
+
if is_float(ground_truth):
|
140 |
+
print(f"Evaluating '{model_answer}' as a number.")
|
141 |
+
normalized = normalize_number_str(model_answer)
|
142 |
+
return normalized == float(ground_truth) if normalized is not None else False
|
143 |
+
|
144 |
+
# Case 2: Ground truth is a list
|
145 |
+
elif any(char in ground_truth for char in [",", ";"]):
|
146 |
+
print(f"Evaluating '{model_answer}' as a comma/semicolon-separated list.")
|
147 |
+
gt_elems = split_string(ground_truth)
|
148 |
+
ma_elems = split_string(model_answer)
|
149 |
+
|
150 |
+
if len(gt_elems) != len(ma_elems):
|
151 |
+
warnings.warn("Answer lists have different lengths, returning False.", UserWarning)
|
152 |
+
return False
|
153 |
+
|
154 |
+
for ma_elem, gt_elem in zip(ma_elems, gt_elems):
|
155 |
+
if is_float(gt_elem):
|
156 |
+
normalized = normalize_number_str(ma_elem)
|
157 |
+
if normalized != float(gt_elem):
|
158 |
+
return False
|
159 |
+
else:
|
160 |
+
if normalize_str(ma_elem, remove_punct=False) != normalize_str(gt_elem, remove_punct=False):
|
161 |
+
return False
|
162 |
+
return True
|
163 |
+
|
164 |
+
# Case 3: Ground truth is a plain string
|
165 |
+
else:
|
166 |
+
print(f"Evaluating '{model_answer}' as a string.")
|
167 |
+
return normalize_str(model_answer) == normalize_str(ground_truth)
|
168 |
|
169 |
+
print(question_scorer("FINAL ANSWER: right", ))
|
170 |
+
|
171 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
172 |
space_id = os.getenv("SPACE_ID")
|
173 |
if profile:
|
|
|
210 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
211 |
except Exception as e:
|
212 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
213 |
+
|
214 |
if not answers_payload:
|
215 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
216 |
+
print(question_scorer("FINAL ANSWER: right",submitted_answer))
|
217 |
submission_data = {
|
218 |
"username": username.strip(),
|
219 |
"agent_code": agent_code,
|