File size: 5,278 Bytes
55d797c 5d637a7 55d797c 5d637a7 55d797c 5d637a7 b9c0bac 5d637a7 b9c0bac 3283369 5d637a7 5f3e7d5 5d637a7 b5b12d3 3283369 b5b12d3 5d637a7 5f3e7d5 5d637a7 b5b12d3 3283369 b5b12d3 55d797c 5f3e7d5 55d797c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import json
import os
import pandas as pd
from loguru import logger
from display.formatting import make_clickable_model
from display.utils_old import EvalQueueColumn
def fetch_model_results(repo_dir: str, competition_type: str, eval_split: str) -> list[dict]:
model_results = []
dirpath = os.path.join(repo_dir, competition_type, eval_split)
for root, _, files in os.walk(dirpath):
if len(files) == 0 or not all(f.endswith(".json") for f in files):
continue
for file in files:
filepath = os.path.join(root, file)
try:
with open(filepath, "r") as fp:
result = json.load(fp)
model_results.append(result)
except Exception as e:
logger.error(f"Error loading model result from {filepath}: {e}")
continue
return model_results
def get_tossups_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
model_results = fetch_model_results(repo_dir, "tossup", eval_split)
eval_results = []
for result in model_results:
try:
metrics = result["metrics"]
username = result["username"]
model_name = result["model_name"]
buzz_accuracy = metrics["buzz_accuracy"]
row = {
"Submission": f"{username}/{model_name}",
"Avg Score ⬆️": metrics["tossup_score"],
"Buzz Accuracy": buzz_accuracy,
"Buzz Position": metrics["buzz_position"],
}
if "human_win_rate" in metrics:
row["Win Rate w/ Humans"] = metrics["human_win_rate"]
row["Win Rate w/ Humans (Aggressive)"] = metrics["human_win_rate_strict"]
else:
row["Win Rate w/ Humans"] = None
row["Win Rate w/ Humans (Aggressive)"] = None
eval_results.append(row)
except Exception as e:
logger.error(f"Error processing model result '{username}/{model_name}': {e}")
continue
df = pd.DataFrame(
eval_results,
columns=[
"Submission",
"Avg Score ⬆️",
"Buzz Accuracy",
"Buzz Position",
"Win Rate w/ Humans",
"Win Rate w/ Humans (Aggressive)",
],
)
df.sort_values(by="Avg Score ⬆️", ascending=False, inplace=True)
return df
def get_bonuses_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
model_results = fetch_model_results(repo_dir, "bonus", eval_split)
eval_results = []
for result in model_results:
try:
metrics = result["metrics"]
username = result["username"]
model_name = result["model_name"]
row = {
"Submission": f"{username}/{model_name}",
"Question Accuracy": metrics["question_accuracy"],
"Part Accuracy": metrics["part_accuracy"],
}
eval_results.append(row)
except Exception as e:
logger.error(f"Error processing model result '{username}/{model_name}': {e}")
continue
df = pd.DataFrame(
eval_results,
columns=["Submission", "Question Accuracy", "Part Accuracy"],
)
df.sort_values(by="Question Accuracy", ascending=False, inplace=True)
return df
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
# TODO: This function is stale, but might be a good reference point for new implementation
"""Creates the different dataframes for the evaluation queues requestes"""
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
all_evals = []
for entry in entries:
if ".json" in entry:
file_path = os.path.join(save_path, entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
elif ".md" not in entry:
# this is a folder
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
for sub_entry in sub_entries:
file_path = os.path.join(save_path, entry, sub_entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
df_running = pd.DataFrame.from_records(running_list, columns=cols)
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
return df_finished[cols], df_running[cols], df_pending[cols]
|