Maharshi Gor
commited on
Commit
·
b5b12d3
1
Parent(s):
54e2d5b
Leaderboard pre-sorted
Browse files- src/populate.py +6 -2
src/populate.py
CHANGED
@@ -55,7 +55,7 @@ def get_tossups_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
|
|
55 |
logger.error(f"Error processing model result '{username}/{model_name}': {e}")
|
56 |
continue
|
57 |
|
58 |
-
|
59 |
eval_results,
|
60 |
columns=[
|
61 |
"Submission",
|
@@ -66,6 +66,8 @@ def get_tossups_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
|
|
66 |
"Win Rate w/ Humans (Aggressive)",
|
67 |
],
|
68 |
)
|
|
|
|
|
69 |
|
70 |
|
71 |
def get_bonuses_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
|
@@ -88,10 +90,12 @@ def get_bonuses_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
|
|
88 |
logger.error(f"Error processing model result '{username}/{model_name}': {e}")
|
89 |
continue
|
90 |
|
91 |
-
|
92 |
eval_results,
|
93 |
columns=["Submission", "Question Accuracy", "Part Accuracy"],
|
94 |
)
|
|
|
|
|
95 |
|
96 |
|
97 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
|
55 |
logger.error(f"Error processing model result '{username}/{model_name}': {e}")
|
56 |
continue
|
57 |
|
58 |
+
df = pd.DataFrame(
|
59 |
eval_results,
|
60 |
columns=[
|
61 |
"Submission",
|
|
|
66 |
"Win Rate w/ Humans (Aggressive)",
|
67 |
],
|
68 |
)
|
69 |
+
df.sort_values(by="Avg Score ⬆️", ascending=False, inplace=True)
|
70 |
+
return df
|
71 |
|
72 |
|
73 |
def get_bonuses_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
|
|
|
90 |
logger.error(f"Error processing model result '{username}/{model_name}': {e}")
|
91 |
continue
|
92 |
|
93 |
+
df = pd.DataFrame(
|
94 |
eval_results,
|
95 |
columns=["Submission", "Question Accuracy", "Part Accuracy"],
|
96 |
)
|
97 |
+
df.sort_values(by="Question Accuracy", ascending=False, inplace=True)
|
98 |
+
return df
|
99 |
|
100 |
|
101 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|