Maharshi Gor
commited on
Commit
·
6d84fb2
1
Parent(s):
20c7e40
Code clean and workflow update
Browse files- shared/workflows +1 -1
- src/components/quizbowl/plotting.py +0 -58
- src/components/quizbowl/tossup.py +2 -2
shared/workflows
CHANGED
@@ -1 +1 @@
|
|
1 |
-
Subproject commit
|
|
|
1 |
+
Subproject commit ec5c0d9ab888cc5e8fcfde8985f2e040ed170b71
|
src/components/quizbowl/plotting.py
CHANGED
@@ -273,64 +273,6 @@ def update_tossup_plot(highlighted_index: int, state: str) -> pd.DataFrame:
|
|
273 |
return pd.DataFrame()
|
274 |
|
275 |
|
276 |
-
# %%
|
277 |
-
|
278 |
-
|
279 |
-
def create_df_entry(run_indices: list[int], run_outputs: list[dict]) -> dict:
|
280 |
-
"""Create a dataframe entry from a list of model outputs."""
|
281 |
-
chosen_idx = None
|
282 |
-
earliest_ok_idx = None
|
283 |
-
is_correct = None
|
284 |
-
for i, o in enumerate(run_outputs):
|
285 |
-
if chosen_idx is None and o["buzz"]:
|
286 |
-
chosen_idx = run_indices[o["position"] - 1] + 1
|
287 |
-
is_correct = o["score"]
|
288 |
-
if earliest_ok_idx is None and o["score"]:
|
289 |
-
earliest_ok_idx = run_indices[o["position"] - 1] + 1
|
290 |
-
if is_correct is None:
|
291 |
-
is_correct = False
|
292 |
-
|
293 |
-
# if buzz is not the last index, correct scores 10, incorrect scores -5
|
294 |
-
# if buzz is the final index, correct scores 5, incorrect scores 0
|
295 |
-
|
296 |
-
if chosen_idx == -1:
|
297 |
-
tossup_score = 0
|
298 |
-
elif chosen_idx == run_indices[-1] + 1:
|
299 |
-
tossup_score = 5 if is_correct else 0
|
300 |
-
else:
|
301 |
-
tossup_score = 10 if is_correct else -5
|
302 |
-
|
303 |
-
gap = None if (chosen_idx is None or earliest_ok_idx is None) else chosen_idx - earliest_ok_idx
|
304 |
-
if earliest_ok_idx is None:
|
305 |
-
cls = "hopeless"
|
306 |
-
elif chosen_idx is None:
|
307 |
-
cls = "never-buzzed" # Opportunity missed to score
|
308 |
-
elif chosen_idx == earliest_ok_idx:
|
309 |
-
cls = "best-buzz" # Perfect timing
|
310 |
-
elif chosen_idx > earliest_ok_idx:
|
311 |
-
cls = "late-buzz" # Opportunity missed to buzz earlier
|
312 |
-
elif chosen_idx < earliest_ok_idx:
|
313 |
-
cls = "premature" # Opportunity missed to score
|
314 |
-
|
315 |
-
return {
|
316 |
-
"chosen_idx": chosen_idx,
|
317 |
-
"earliest_ok_idx": earliest_ok_idx,
|
318 |
-
"gap": gap,
|
319 |
-
"cls": cls,
|
320 |
-
"tossup_score": tossup_score,
|
321 |
-
"is_correct": int(is_correct),
|
322 |
-
}
|
323 |
-
|
324 |
-
|
325 |
-
def prepare_tossup_results_df(run_indices: list[list[int]], model_outputs: list[list[dict]]) -> pd.DataFrame:
|
326 |
-
"""Create a dataframe from a list of model outputs."""
|
327 |
-
records = []
|
328 |
-
for indices, outputs in zip(run_indices, model_outputs):
|
329 |
-
entry = create_df_entry(indices, outputs)
|
330 |
-
records.append(entry)
|
331 |
-
return pd.DataFrame.from_records(records)
|
332 |
-
|
333 |
-
|
334 |
def create_tossup_eval_table(df: pd.DataFrame) -> pd.DataFrame:
|
335 |
"""Create a table from a dataframe."""
|
336 |
# Prepare a dataframe of aggregated metrics:
|
|
|
273 |
return pd.DataFrame()
|
274 |
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
def create_tossup_eval_table(df: pd.DataFrame) -> pd.DataFrame:
|
277 |
"""Create a table from a dataframe."""
|
278 |
# Prepare a dataframe of aggregated metrics:
|
src/components/quizbowl/tossup.py
CHANGED
@@ -14,6 +14,7 @@ from components.typed_dicts import TossupInterfaceDefaults, TossupPipelineStateD
|
|
14 |
from display.formatting import styled_error
|
15 |
from shared.workflows import factory
|
16 |
from shared.workflows.metrics import evaluate_prediction
|
|
|
17 |
from shared.workflows.qb_agents import QuizBowlTossupAgent, TossupResult
|
18 |
from submission import submit
|
19 |
|
@@ -23,7 +24,6 @@ from .plotting import (
|
|
23 |
create_tossup_eval_dashboard,
|
24 |
create_tossup_eval_table,
|
25 |
create_tossup_html,
|
26 |
-
prepare_tossup_results_df,
|
27 |
)
|
28 |
from .utils import create_error_message
|
29 |
from .validation import UserInputWorkflowValidator
|
@@ -336,7 +336,7 @@ class TossupInterface:
|
|
336 |
for example in progress.tqdm(self.ds, desc="Evaluating tossup questions"):
|
337 |
run_outputs = self.get_agent_outputs(example, pipeline_state, early_stop=True)
|
338 |
model_outputs.append(run_outputs)
|
339 |
-
eval_df = prepare_tossup_results_df(self.ds["run_indices"]
|
340 |
plot_data = create_tossup_eval_dashboard(self.ds["run_indices"], eval_df)
|
341 |
output_df = create_tossup_eval_table(eval_df)
|
342 |
return (
|
|
|
14 |
from display.formatting import styled_error
|
15 |
from shared.workflows import factory
|
16 |
from shared.workflows.metrics import evaluate_prediction
|
17 |
+
from shared.workflows.metrics.qb_metrics import prepare_tossup_results_df
|
18 |
from shared.workflows.qb_agents import QuizBowlTossupAgent, TossupResult
|
19 |
from submission import submit
|
20 |
|
|
|
24 |
create_tossup_eval_dashboard,
|
25 |
create_tossup_eval_table,
|
26 |
create_tossup_html,
|
|
|
27 |
)
|
28 |
from .utils import create_error_message
|
29 |
from .validation import UserInputWorkflowValidator
|
|
|
336 |
for example in progress.tqdm(self.ds, desc="Evaluating tossup questions"):
|
337 |
run_outputs = self.get_agent_outputs(example, pipeline_state, early_stop=True)
|
338 |
model_outputs.append(run_outputs)
|
339 |
+
eval_df = prepare_tossup_results_df(model_outputs, self.ds["run_indices"])
|
340 |
plot_data = create_tossup_eval_dashboard(self.ds["run_indices"], eval_df)
|
341 |
output_df = create_tossup_eval_table(eval_df)
|
342 |
return (
|