Maharshi Gor commited on
Commit
6d84fb2
·
1 Parent(s): 20c7e40

Code clean and workflow update

Browse files
shared/workflows CHANGED
@@ -1 +1 @@
1
- Subproject commit 873b0e6bc80052921fa2061ef9fbcd4e1c4f057e
 
1
+ Subproject commit ec5c0d9ab888cc5e8fcfde8985f2e040ed170b71
src/components/quizbowl/plotting.py CHANGED
@@ -273,64 +273,6 @@ def update_tossup_plot(highlighted_index: int, state: str) -> pd.DataFrame:
273
  return pd.DataFrame()
274
 
275
 
276
- # %%
277
-
278
-
279
- def create_df_entry(run_indices: list[int], run_outputs: list[dict]) -> dict:
280
- """Create a dataframe entry from a list of model outputs."""
281
- chosen_idx = None
282
- earliest_ok_idx = None
283
- is_correct = None
284
- for i, o in enumerate(run_outputs):
285
- if chosen_idx is None and o["buzz"]:
286
- chosen_idx = run_indices[o["position"] - 1] + 1
287
- is_correct = o["score"]
288
- if earliest_ok_idx is None and o["score"]:
289
- earliest_ok_idx = run_indices[o["position"] - 1] + 1
290
- if is_correct is None:
291
- is_correct = False
292
-
293
- # if buzz is not the last index, correct scores 10, incorrect scores -5
294
- # if buzz is the final index, correct scores 5, incorrect scores 0
295
-
296
- if chosen_idx == -1:
297
- tossup_score = 0
298
- elif chosen_idx == run_indices[-1] + 1:
299
- tossup_score = 5 if is_correct else 0
300
- else:
301
- tossup_score = 10 if is_correct else -5
302
-
303
- gap = None if (chosen_idx is None or earliest_ok_idx is None) else chosen_idx - earliest_ok_idx
304
- if earliest_ok_idx is None:
305
- cls = "hopeless"
306
- elif chosen_idx is None:
307
- cls = "never-buzzed" # Opportunity missed to score
308
- elif chosen_idx == earliest_ok_idx:
309
- cls = "best-buzz" # Perfect timing
310
- elif chosen_idx > earliest_ok_idx:
311
- cls = "late-buzz" # Opportunity missed to buzz earlier
312
- elif chosen_idx < earliest_ok_idx:
313
- cls = "premature" # Opportunity missed to score
314
-
315
- return {
316
- "chosen_idx": chosen_idx,
317
- "earliest_ok_idx": earliest_ok_idx,
318
- "gap": gap,
319
- "cls": cls,
320
- "tossup_score": tossup_score,
321
- "is_correct": int(is_correct),
322
- }
323
-
324
-
325
- def prepare_tossup_results_df(run_indices: list[list[int]], model_outputs: list[list[dict]]) -> pd.DataFrame:
326
- """Create a dataframe from a list of model outputs."""
327
- records = []
328
- for indices, outputs in zip(run_indices, model_outputs):
329
- entry = create_df_entry(indices, outputs)
330
- records.append(entry)
331
- return pd.DataFrame.from_records(records)
332
-
333
-
334
  def create_tossup_eval_table(df: pd.DataFrame) -> pd.DataFrame:
335
  """Create a table from a dataframe."""
336
  # Prepare a dataframe of aggregated metrics:
 
273
  return pd.DataFrame()
274
 
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  def create_tossup_eval_table(df: pd.DataFrame) -> pd.DataFrame:
277
  """Create a table from a dataframe."""
278
  # Prepare a dataframe of aggregated metrics:
src/components/quizbowl/tossup.py CHANGED
@@ -14,6 +14,7 @@ from components.typed_dicts import TossupInterfaceDefaults, TossupPipelineStateD
14
  from display.formatting import styled_error
15
  from shared.workflows import factory
16
  from shared.workflows.metrics import evaluate_prediction
 
17
  from shared.workflows.qb_agents import QuizBowlTossupAgent, TossupResult
18
  from submission import submit
19
 
@@ -23,7 +24,6 @@ from .plotting import (
23
  create_tossup_eval_dashboard,
24
  create_tossup_eval_table,
25
  create_tossup_html,
26
- prepare_tossup_results_df,
27
  )
28
  from .utils import create_error_message
29
  from .validation import UserInputWorkflowValidator
@@ -336,7 +336,7 @@ class TossupInterface:
336
  for example in progress.tqdm(self.ds, desc="Evaluating tossup questions"):
337
  run_outputs = self.get_agent_outputs(example, pipeline_state, early_stop=True)
338
  model_outputs.append(run_outputs)
339
- eval_df = prepare_tossup_results_df(self.ds["run_indices"], model_outputs)
340
  plot_data = create_tossup_eval_dashboard(self.ds["run_indices"], eval_df)
341
  output_df = create_tossup_eval_table(eval_df)
342
  return (
 
14
  from display.formatting import styled_error
15
  from shared.workflows import factory
16
  from shared.workflows.metrics import evaluate_prediction
17
+ from shared.workflows.metrics.qb_metrics import prepare_tossup_results_df
18
  from shared.workflows.qb_agents import QuizBowlTossupAgent, TossupResult
19
  from submission import submit
20
 
 
24
  create_tossup_eval_dashboard,
25
  create_tossup_eval_table,
26
  create_tossup_html,
 
27
  )
28
  from .utils import create_error_message
29
  from .validation import UserInputWorkflowValidator
 
336
  for example in progress.tqdm(self.ds, desc="Evaluating tossup questions"):
337
  run_outputs = self.get_agent_outputs(example, pipeline_state, early_stop=True)
338
  model_outputs.append(run_outputs)
339
+ eval_df = prepare_tossup_results_df(model_outputs, self.ds["run_indices"])
340
  plot_data = create_tossup_eval_dashboard(self.ds["run_indices"], eval_df)
341
  output_df = create_tossup_eval_table(eval_df)
342
  return (