Commit
·
33ce85b
1
Parent(s):
c2f28e6
Update columns
Browse files- src/display/about.py +15 -19
- src/display/utils.py +16 -20
src/display/about.py
CHANGED
@@ -12,26 +12,22 @@ class Task:
|
|
12 |
# Init: to update with your specific keys
|
13 |
class Tasks(Enum):
|
14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
-
task0 = Task("
|
16 |
-
task1 = Task("
|
17 |
-
task2 = Task("
|
18 |
-
task3 = Task("
|
19 |
-
task4 = Task("
|
20 |
-
task5 = Task("
|
21 |
-
task6 = Task("
|
22 |
task7 = Task("ctkfacts_en", "accuracy", "ctkfacts_en")
|
23 |
-
task8 = Task("
|
24 |
-
task9 = Task("
|
25 |
-
task10 = Task("
|
26 |
-
task11 = Task("
|
27 |
-
task12 = Task("
|
28 |
-
task13 = Task("
|
29 |
-
task14 = Task("
|
30 |
-
task15 = Task("
|
31 |
-
task16 = Task("subjectivity_en", "accuracy", "subjectivity_en")
|
32 |
-
task17 = Task("truthfulqa", "accuracy", "truthfulqa")
|
33 |
-
task18 = Task("gsm8k", "accuracy", "gsm8k")
|
34 |
-
task19 = Task("squad", "accuracy", "squad")
|
35 |
|
36 |
# Your leaderboard name
|
37 |
TITLE = """<h1 align="center" id="space-title">🇨🇿 CzechBench Leaderboard</h1>"""
|
|
|
12 |
# Init: to update with your specific keys
|
13 |
class Tasks(Enum):
|
14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
+
task0 = Task("agree_cs", "accuracy", "agree_cs")
|
16 |
+
task1 = Task("anli_cs", "accuracy", "anli_cs")
|
17 |
+
task2 = Task("arc_challenge_cs", "accuracy", "arc_challenge_cs")
|
18 |
+
task3 = Task("arc_easy_cs", "accuracy", "arc_easy_cs")
|
19 |
+
task4 = Task("belebele_cs", "accuracy", "belebele_cs")
|
20 |
+
task5 = Task("ctkfacts_cs", "accuracy", "ctkfacts_cs")
|
21 |
+
task6 = Task("czechnews_cs", "accuracy", "czechnews_cs")
|
22 |
task7 = Task("ctkfacts_en", "accuracy", "ctkfacts_en")
|
23 |
+
task8 = Task("fb_comments_cs", "accuracy", "fb_comments_cs")
|
24 |
+
task9 = Task("gsm8k_cs", "accuracy", "gsm8k_cs")
|
25 |
+
task10 = Task("klokanek_cs", "accuracy", "klokanek_cs")
|
26 |
+
task11 = Task("mall_reviews_cs", "accuracy", "mall_reviews_cs")
|
27 |
+
task12 = Task("mmlu_cs", "accuracy", "mmlu_cs")
|
28 |
+
task13 = Task("sqad_cs", "accuracy", "sqad_cs")
|
29 |
+
task14 = Task("subjectivity_cs", "accuracy", "subjectivity_cs")
|
30 |
+
task15 = Task("truthfulqa_cs", "accuracy", "truthfulqa_cs")
|
|
|
|
|
|
|
|
|
31 |
|
32 |
# Your leaderboard name
|
33 |
TITLE = """<h1 align="center" id="space-title">🇨🇿 CzechBench Leaderboard</h1>"""
|
src/display/utils.py
CHANGED
@@ -48,26 +48,22 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
|
|
48 |
"""
|
49 |
|
50 |
auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
|
51 |
-
auto_eval_column_dict.append(["
|
52 |
-
auto_eval_column_dict.append(["
|
53 |
-
auto_eval_column_dict.append(["
|
54 |
-
auto_eval_column_dict.append(["
|
55 |
-
auto_eval_column_dict.append(["
|
56 |
-
auto_eval_column_dict.append(["
|
57 |
-
auto_eval_column_dict.append(["
|
58 |
-
auto_eval_column_dict.append(["
|
59 |
-
auto_eval_column_dict.append(["
|
60 |
-
auto_eval_column_dict.append(["
|
61 |
-
auto_eval_column_dict.append(["
|
62 |
-
auto_eval_column_dict.append(["
|
63 |
-
auto_eval_column_dict.append(["
|
64 |
-
auto_eval_column_dict.append(["
|
65 |
-
auto_eval_column_dict.append(["
|
66 |
-
|
67 |
-
auto_eval_column_dict.append(["subjectivity_en", ColumnContent, ColumnContent("subjectivity_en", "number", True)])
|
68 |
-
auto_eval_column_dict.append(["truthfulqa", ColumnContent, ColumnContent("truthfulqa", "number", True)])
|
69 |
-
auto_eval_column_dict.append(["gsm8k", ColumnContent, ColumnContent("gsm8k", "number", True)])
|
70 |
-
auto_eval_column_dict.append(["squad", ColumnContent, ColumnContent("squad", "number", True)])
|
71 |
|
72 |
# We use make dataclass to dynamically fill the scores from Tasks
|
73 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
48 |
"""
|
49 |
|
50 |
auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
|
51 |
+
auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
|
52 |
+
auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
|
53 |
+
auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])
|
54 |
+
auto_eval_column_dict.append(["arc_easy_cs", ColumnContent, ColumnContent("arc_easy_cs", "number", True)])
|
55 |
+
auto_eval_column_dict.append(["belebele_cs", ColumnContent, ColumnContent("belebele_cs", "number", True)])
|
56 |
+
auto_eval_column_dict.append(["ctkfacts_cs", ColumnContent, ColumnContent("ctkfacts_cs", "number", True)])
|
57 |
+
auto_eval_column_dict.append(["czechnews_cs", ColumnContent, ColumnContent("czechnews_cs", "number", True)])
|
58 |
+
auto_eval_column_dict.append(["fb_comments_cs", ColumnContent, ColumnContent("fb_comments_cs", "number", True)])
|
59 |
+
auto_eval_column_dict.append(["gsm8k_cs", ColumnContent, ColumnContent("gsm8k_cs", "number", True)])
|
60 |
+
auto_eval_column_dict.append(["klokanek_cs", ColumnContent, ColumnContent("klokanek_cs", "number", True)])
|
61 |
+
auto_eval_column_dict.append(["mall_reviews_cs", ColumnContent, ColumnContent("mall_reviews_cs", "number", True)])
|
62 |
+
auto_eval_column_dict.append(["mmlu_cs", ColumnContent, ColumnContent("mmlu_cs", "number", True)])
|
63 |
+
auto_eval_column_dict.append(["sqad_cs", ColumnContent, ColumnContent("sqad_cs", "number", True)])
|
64 |
+
auto_eval_column_dict.append(["subjectivity_cs", ColumnContent, ColumnContent("subjectivity_cs", "number", True)])
|
65 |
+
auto_eval_column_dict.append(["truthfulqa_cs", ColumnContent, ColumnContent("truthfulqa_cs", "number", True)])
|
66 |
+
|
|
|
|
|
|
|
|
|
67 |
|
68 |
# We use make dataclass to dynamically fill the scores from Tasks
|
69 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|