Spaces:
Running
Running
Commit
·
0d4e8d1
1
Parent(s):
fced179
add
Browse files- results/DeepSeek-R1.json +3 -1
- results/Llama-3.1-70B-Instruct.json +3 -1
- results/Llama-3.1-8B-Instruct.json +3 -1
- results/Llama-3.3-70B-Instruct.json +3 -1
- results/Mistral-7B-Instruct-v0.3.json +3 -1
- results/Mistral-Large-Instruct-2411.json +3 -1
- results/Mistral-Small-Instruct-2409.json +3 -1
- results/QwQ-32B-Preview.json +3 -1
- results/Qwen2.5-32B-Instruct.json +3 -1
- results/Qwen2.5-72B-Instruct.json +3 -1
- results/Qwen2.5-7B-Instruct.json +3 -1
- src/display/formatting.py +2 -3
- src/leaderboard/read_evals.py +12 -7
results/DeepSeek-R1.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "deepseek-ai/DeepSeek-R1"
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "deepseek-ai/DeepSeek-R1",
|
4 |
+
"link": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
|
5 |
+
"Params": "671B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
results/Llama-3.1-70B-Instruct.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "meta-llama/
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "meta-llama/Llama-3.1-70B-Instruct",
|
4 |
+
"link": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
|
5 |
+
"Params": "70B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
results/Llama-3.1-8B-Instruct.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "meta-llama/
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "meta-llama/Llama-3.1-8B-Instruct",
|
4 |
+
"link": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
|
5 |
+
"Params": "8B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
results/Llama-3.3-70B-Instruct.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "meta-llama/Llama-3.3-70B-Instruct"
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
|
4 |
+
"link": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
5 |
+
"Params": "70B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
results/Mistral-7B-Instruct-v0.3.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "mistralai/Mistral-7B-Instruct-v0.3"
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.3",
|
4 |
+
"link": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3",
|
5 |
+
"Params": "7B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
results/Mistral-Large-Instruct-2411.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "mistralai/Mistral-Large-Instruct-2411"
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "mistralai/Mistral-Large-Instruct-2411",
|
4 |
+
"link": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2411",
|
5 |
+
"Params": "123B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
results/Mistral-Small-Instruct-2409.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "mistralai/Mistral-Small-Instruct-2409"
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "mistralai/Mistral-Small-Instruct-2409",
|
4 |
+
"link": "https://huggingface.co/mistralai/Mistral-Small-Instruct-2409",
|
5 |
+
"Params": "22B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
results/QwQ-32B-Preview.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "Qwen/QwQ-32B-Preview"
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "Qwen/QwQ-32B-Preview",
|
4 |
+
"link": "https://huggingface.co/Qwen/QwQ-32B-Preview",
|
5 |
+
"Params": "32B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
results/Qwen2.5-32B-Instruct.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "Qwen/Qwen2.5-32B-Instruct"
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "Qwen/Qwen2.5-32B-Instruct",
|
4 |
+
"link": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct",
|
5 |
+
"Params": "32B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
results/Qwen2.5-72B-Instruct.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "Qwen/Qwen2.5-72B-Instruct"
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "Qwen/Qwen2.5-72B-Instruct",
|
4 |
+
"link": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
5 |
+
"Params": "72B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
results/Qwen2.5-7B-Instruct.json
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
{
|
2 |
"config": {
|
3 |
-
"model_name": "Qwen/Qwen2.5-7B-Instruct"
|
|
|
|
|
4 |
},
|
5 |
"results": {
|
6 |
"Overall": {
|
|
|
1 |
{
|
2 |
"config": {
|
3 |
+
"model_name": "Qwen/Qwen2.5-7B-Instruct",
|
4 |
+
"link": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
|
5 |
+
"Params": "7B"
|
6 |
},
|
7 |
"results": {
|
8 |
"Overall": {
|
src/display/formatting.py
CHANGED
@@ -2,9 +2,8 @@ def model_hyperlink(link, model_name):
|
|
2 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
3 |
|
4 |
|
5 |
-
def make_clickable_model(model_name,
|
6 |
-
if
|
7 |
-
link = f"https://huggingface.co/{model_name}"
|
8 |
return model_hyperlink(link, model_name)
|
9 |
else:
|
10 |
return f'<span>{model_name}</span>'
|
|
|
2 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
3 |
|
4 |
|
5 |
+
def make_clickable_model(model_name, link=''):
|
6 |
+
if link:
|
|
|
7 |
return model_hyperlink(link, model_name)
|
8 |
else:
|
9 |
return f'<span>{model_name}</span>'
|
src/leaderboard/read_evals.py
CHANGED
@@ -28,9 +28,10 @@ class EvalResult:
|
|
28 |
# architecture: str = "Unknown"
|
29 |
# license: str = "?"
|
30 |
# likes: int = 0
|
31 |
-
num_params:
|
32 |
# date: str = "" # submission date of request file
|
33 |
-
still_on_hub: bool = False
|
|
|
34 |
|
35 |
@classmethod
|
36 |
def init_from_json_file(self, json_filepath):
|
@@ -46,6 +47,8 @@ class EvalResult:
|
|
46 |
# Get model and org
|
47 |
org_and_model = config.get("model_name", config.get("model_args", None))
|
48 |
org_and_model = org_and_model.split("/", 1)
|
|
|
|
|
49 |
|
50 |
if len(org_and_model) == 1:
|
51 |
org = None
|
@@ -59,9 +62,9 @@ class EvalResult:
|
|
59 |
result_key = f"{org}_{model}"
|
60 |
full_model = "/".join(org_and_model)
|
61 |
|
62 |
-
still_on_hub, _, model_config = is_model_on_hub(
|
63 |
-
|
64 |
-
)
|
65 |
# architecture = "?"
|
66 |
# if model_config is not None:
|
67 |
# architectures = getattr(model_config, "architectures", None)
|
@@ -87,9 +90,11 @@ class EvalResult:
|
|
87 |
org=org,
|
88 |
model=model,
|
89 |
results=results,
|
|
|
|
|
90 |
# precision=precision,
|
91 |
# revision= config.get("model_sha", ""),
|
92 |
-
still_on_hub=still_on_hub,
|
93 |
# architecture=architecture
|
94 |
)
|
95 |
|
@@ -121,7 +126,7 @@ class EvalResult:
|
|
121 |
# AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
122 |
# AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
123 |
# AutoEvalColumn.architecture.name: self.architecture,
|
124 |
-
AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.
|
125 |
# AutoEvalColumn.revision.name: self.revision,
|
126 |
AutoEvalColumn.average.name: average,
|
127 |
# AutoEvalColumn.license.name: self.license,
|
|
|
28 |
# architecture: str = "Unknown"
|
29 |
# license: str = "?"
|
30 |
# likes: int = 0
|
31 |
+
num_params: str = '-'
|
32 |
# date: str = "" # submission date of request file
|
33 |
+
# still_on_hub: bool = False
|
34 |
+
link: str = ''
|
35 |
|
36 |
@classmethod
|
37 |
def init_from_json_file(self, json_filepath):
|
|
|
47 |
# Get model and org
|
48 |
org_and_model = config.get("model_name", config.get("model_args", None))
|
49 |
org_and_model = org_and_model.split("/", 1)
|
50 |
+
link = config.get("link", '')
|
51 |
+
params = config.get("params", '-')
|
52 |
|
53 |
if len(org_and_model) == 1:
|
54 |
org = None
|
|
|
62 |
result_key = f"{org}_{model}"
|
63 |
full_model = "/".join(org_and_model)
|
64 |
|
65 |
+
# still_on_hub, _, model_config = is_model_on_hub(
|
66 |
+
# full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
67 |
+
# )
|
68 |
# architecture = "?"
|
69 |
# if model_config is not None:
|
70 |
# architectures = getattr(model_config, "architectures", None)
|
|
|
90 |
org=org,
|
91 |
model=model,
|
92 |
results=results,
|
93 |
+
link=link,
|
94 |
+
num_params=params,
|
95 |
# precision=precision,
|
96 |
# revision= config.get("model_sha", ""),
|
97 |
+
# still_on_hub=still_on_hub,
|
98 |
# architecture=architecture
|
99 |
)
|
100 |
|
|
|
126 |
# AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
127 |
# AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
128 |
# AutoEvalColumn.architecture.name: self.architecture,
|
129 |
+
AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.link),
|
130 |
# AutoEvalColumn.revision.name: self.revision,
|
131 |
AutoEvalColumn.average.name: average,
|
132 |
# AutoEvalColumn.license.name: self.license,
|