Merge branch 'main' of hf.co:spaces/TheFinAI/open_greek_finance_llm_leaderboard into main
Browse files
backend/app/services/leaderboard.py
CHANGED
@@ -116,6 +116,11 @@ class LeaderboardService:
|
|
116 |
"value": data.get("QA Raw", 0),
|
117 |
"normalized_score": data.get("QA", 0)
|
118 |
},
|
|
|
|
|
|
|
|
|
|
|
119 |
}
|
120 |
|
121 |
features = {
|
|
|
116 |
"value": data.get("QA Raw", 0),
|
117 |
"normalized_score": data.get("QA", 0)
|
118 |
},
|
119 |
+
"fns": {
|
120 |
+
"name": "FNS",
|
121 |
+
"value": data.get("FNS Raw", 0),
|
122 |
+
"normalized_score": data.get("FNS", 0)
|
123 |
+
},
|
124 |
}
|
125 |
|
126 |
features = {
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/defaults.js
CHANGED
@@ -137,6 +137,12 @@ const COLUMNS = {
|
|
137 |
defaultVisible: true,
|
138 |
label: "QA",
|
139 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
},
|
141 |
MODEL_INFO: {
|
142 |
"metadata.co2_cost": {
|
|
|
137 |
defaultVisible: true,
|
138 |
label: "QA",
|
139 |
},
|
140 |
+
"evaluations.fns.normalized_score": {
|
141 |
+
group: "evaluation",
|
142 |
+
size: COLUMN_SIZES.BENCHMARK,
|
143 |
+
defaultVisible: true,
|
144 |
+
label: "FNS",
|
145 |
+
},
|
146 |
},
|
147 |
MODEL_INFO: {
|
148 |
"metadata.co2_cost": {
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/tooltips.js
CHANGED
@@ -48,7 +48,7 @@ export const COLUMN_TOOLTIPS = {
|
|
48 |
subItems: ["Language Understanding", "Classification"],
|
49 |
},
|
50 |
{
|
51 |
-
label: "Scoring:
|
52 |
description: "Was the correct choice selected among the options.",
|
53 |
},
|
54 |
]),
|
@@ -61,11 +61,24 @@ export const COLUMN_TOOLTIPS = {
|
|
61 |
subItems: ["Language Understanding", "Classification"],
|
62 |
},
|
63 |
{
|
64 |
-
label: "Scoring:
|
65 |
description: "Was the correct choice selected among the options.",
|
66 |
},
|
67 |
]),
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
ARCHITECTURE: createTooltipContent("Model Architecture Information:", [
|
70 |
{
|
71 |
label: "Definition",
|
|
|
48 |
subItems: ["Language Understanding", "Classification"],
|
49 |
},
|
50 |
{
|
51 |
+
label: "Scoring: ACC Norm",
|
52 |
description: "Was the correct choice selected among the options.",
|
53 |
},
|
54 |
]),
|
|
|
61 |
subItems: ["Language Understanding", "Classification"],
|
62 |
},
|
63 |
{
|
64 |
+
label: "Scoring: ACC Norm",
|
65 |
description: "Was the correct choice selected among the options.",
|
66 |
},
|
67 |
]),
|
68 |
|
69 |
+
FNS: createTooltipContent("Multilingual Financial NLP (FNS):", [
|
70 |
+
{
|
71 |
+
label: "Purpose",
|
72 |
+
description:
|
73 |
+
"Tests model's ability to summarize real-world financial annual reports",
|
74 |
+
subItems: ["Language Understanding", "Summarization"],
|
75 |
+
},
|
76 |
+
{
|
77 |
+
label: "Scoring: Rouge1",
|
78 |
+
description: "Was the overlap of unigrams (each word) between the predicted and reference summaries.",
|
79 |
+
},
|
80 |
+
]),
|
81 |
+
|
82 |
ARCHITECTURE: createTooltipContent("Model Architecture Information:", [
|
83 |
{
|
84 |
label: "Definition",
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/utils/columnUtils.js
CHANGED
@@ -768,6 +768,15 @@ export const createColumns = (
|
|
768 |
"evaluations.qa.normalized_score"
|
769 |
],
|
770 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
771 |
];
|
772 |
|
773 |
const optionalColumns = [
|
|
|
768 |
"evaluations.qa.normalized_score"
|
769 |
],
|
770 |
},
|
771 |
+
{
|
772 |
+
accessorKey: "evaluations.fns.normalized_score",
|
773 |
+
header: createHeaderCell("FNS", COLUMN_TOOLTIPS.FNS),
|
774 |
+
cell: ({ row, getValue }) =>
|
775 |
+
createScoreCell(getValue, row, "evaluations.fns.normalized_score"),
|
776 |
+
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES[
|
777 |
+
"evaluations.fns.normalized_score"
|
778 |
+
],
|
779 |
+
},
|
780 |
];
|
781 |
|
782 |
const optionalColumns = [
|