David Pomerenke
commited on
Commit
·
092c06a
1
Parent(s):
5fa433f
Block gemini-2.5-pro-exp-03-25
Browse files- evals/main.py +1 -1
- evals/models.py +6 -4
- languages.json +5 -5
- models.json +66 -0
- results.json +0 -0
evals/main.py
CHANGED
@@ -10,7 +10,7 @@ from tasks import tasks
|
|
10 |
# ===== config =====
|
11 |
|
12 |
n_sentences = 10
|
13 |
-
n_languages =
|
14 |
n_models = 25
|
15 |
|
16 |
# ===== run evaluation and aggregate results =====
|
|
|
10 |
# ===== config =====
|
11 |
|
12 |
n_sentences = 10
|
13 |
+
n_languages = 20
|
14 |
n_models = 25
|
15 |
|
16 |
# ===== run evaluation and aggregate results =====
|
evals/models.py
CHANGED
@@ -44,6 +44,10 @@ models = [
|
|
44 |
"amazon/nova-micro-v1", # 0.09$
|
45 |
]
|
46 |
|
|
|
|
|
|
|
|
|
47 |
transcription_models = [
|
48 |
"elevenlabs/scribe_v1",
|
49 |
"openai/whisper-large-v3",
|
@@ -58,7 +62,6 @@ cache = Memory(location=".cache", verbose=0).cache
|
|
58 |
def get_models(date: date):
|
59 |
return get("https://openrouter.ai/api/frontend/models").json()["data"]
|
60 |
|
61 |
-
|
62 |
def get_slug(permaslug):
|
63 |
models = get_models(date.today())
|
64 |
slugs = [m["slug"] for m in models if m["permaslug"] == permaslug]
|
@@ -88,12 +91,11 @@ def get_current_popular_models(date: date):
|
|
88 |
data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
|
89 |
return [get_slug(model["model_permaslug"]) for model in data]
|
90 |
|
91 |
-
|
92 |
models += [
|
93 |
-
m for m in get_historical_popular_models(date.today()) if m and m not in models
|
94 |
][:5]
|
95 |
models += [
|
96 |
-
m for m in get_current_popular_models(date.today()) if m and m not in models
|
97 |
][:5]
|
98 |
|
99 |
|
|
|
44 |
"amazon/nova-micro-v1", # 0.09$
|
45 |
]
|
46 |
|
47 |
+
blocklist = [
|
48 |
+
"google/gemini-2.5-pro-exp-03-25" # rate limit too low
|
49 |
+
]
|
50 |
+
|
51 |
transcription_models = [
|
52 |
"elevenlabs/scribe_v1",
|
53 |
"openai/whisper-large-v3",
|
|
|
62 |
def get_models(date: date):
|
63 |
return get("https://openrouter.ai/api/frontend/models").json()["data"]
|
64 |
|
|
|
65 |
def get_slug(permaslug):
|
66 |
models = get_models(date.today())
|
67 |
slugs = [m["slug"] for m in models if m["permaslug"] == permaslug]
|
|
|
91 |
data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
|
92 |
return [get_slug(model["model_permaslug"]) for model in data]
|
93 |
|
|
|
94 |
models += [
|
95 |
+
m for m in get_historical_popular_models(date.today()) if m and m not in models and m not in blocklist
|
96 |
][:5]
|
97 |
models += [
|
98 |
+
m for m in get_current_popular_models(date.today()) if m and m not in models and m not in blocklist
|
99 |
][:5]
|
100 |
|
101 |
|
languages.json
CHANGED
@@ -1027,7 +1027,7 @@
|
|
1027 |
"family":"Uralic",
|
1028 |
"flores_path":"hun_Latn",
|
1029 |
"fleurs_tag":"hu_hu",
|
1030 |
-
"commonvoice_hours":
|
1031 |
"commonvoice_locale":"hu",
|
1032 |
"in_benchmark":true
|
1033 |
},
|
@@ -2359,7 +2359,7 @@
|
|
2359 |
"family":"Atlantic-Congo",
|
2360 |
"flores_path":null,
|
2361 |
"fleurs_tag":null,
|
2362 |
-
"commonvoice_hours":4.
|
2363 |
"commonvoice_locale":"ibb",
|
2364 |
"in_benchmark":false
|
2365 |
},
|
@@ -5347,7 +5347,7 @@
|
|
5347 |
"family":"Atlantic-Congo",
|
5348 |
"flores_path":null,
|
5349 |
"fleurs_tag":null,
|
5350 |
-
"commonvoice_hours":2.
|
5351 |
"commonvoice_locale":"mua",
|
5352 |
"in_benchmark":false
|
5353 |
},
|
@@ -6199,7 +6199,7 @@
|
|
6199 |
"family":"Atlantic-Congo",
|
6200 |
"flores_path":null,
|
6201 |
"fleurs_tag":null,
|
6202 |
-
"commonvoice_hours":
|
6203 |
"commonvoice_locale":"jgo",
|
6204 |
"in_benchmark":false
|
6205 |
},
|
@@ -6367,7 +6367,7 @@
|
|
6367 |
"family":"Indo-European",
|
6368 |
"flores_path":null,
|
6369 |
"fleurs_tag":null,
|
6370 |
-
"commonvoice_hours":0.
|
6371 |
"commonvoice_locale":"btv",
|
6372 |
"in_benchmark":false
|
6373 |
},
|
|
|
1027 |
"family":"Uralic",
|
1028 |
"flores_path":"hun_Latn",
|
1029 |
"fleurs_tag":"hu_hu",
|
1030 |
+
"commonvoice_hours":93.0,
|
1031 |
"commonvoice_locale":"hu",
|
1032 |
"in_benchmark":true
|
1033 |
},
|
|
|
2359 |
"family":"Atlantic-Congo",
|
2360 |
"flores_path":null,
|
2361 |
"fleurs_tag":null,
|
2362 |
+
"commonvoice_hours":4.3,
|
2363 |
"commonvoice_locale":"ibb",
|
2364 |
"in_benchmark":false
|
2365 |
},
|
|
|
5347 |
"family":"Atlantic-Congo",
|
5348 |
"flores_path":null,
|
5349 |
"fleurs_tag":null,
|
5350 |
+
"commonvoice_hours":2.6,
|
5351 |
"commonvoice_locale":"mua",
|
5352 |
"in_benchmark":false
|
5353 |
},
|
|
|
6199 |
"family":"Atlantic-Congo",
|
6200 |
"flores_path":null,
|
6201 |
"fleurs_tag":null,
|
6202 |
+
"commonvoice_hours":4.9,
|
6203 |
"commonvoice_locale":"jgo",
|
6204 |
"in_benchmark":false
|
6205 |
},
|
|
|
6367 |
"family":"Indo-European",
|
6368 |
"flores_path":null,
|
6369 |
"fleurs_tag":null,
|
6370 |
+
"commonvoice_hours":0.8,
|
6371 |
"commonvoice_locale":"btv",
|
6372 |
"in_benchmark":false
|
6373 |
},
|
models.json
CHANGED
@@ -218,5 +218,71 @@
|
|
218 |
"type":"Commercial",
|
219 |
"license":null,
|
220 |
"creation_date":1733356800000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
}
|
222 |
]
|
|
|
218 |
"type":"Commercial",
|
219 |
"license":null,
|
220 |
"creation_date":1733356800000
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"id":"google\/gemini-2.0-flash-001",
|
224 |
+
"name":"Gemini 2.0 Flash",
|
225 |
+
"provider_name":"Google",
|
226 |
+
"cost":0.4,
|
227 |
+
"hf_id":null,
|
228 |
+
"size":null,
|
229 |
+
"type":"Commercial",
|
230 |
+
"license":null,
|
231 |
+
"creation_date":1738713600000
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"id":"google\/gemini-flash-1.5",
|
235 |
+
"name":"Gemini 1.5 Flash ",
|
236 |
+
"provider_name":"Google",
|
237 |
+
"cost":0.3,
|
238 |
+
"hf_id":null,
|
239 |
+
"size":null,
|
240 |
+
"type":"Commercial",
|
241 |
+
"license":null,
|
242 |
+
"creation_date":1715644800000
|
243 |
+
},
|
244 |
+
{
|
245 |
+
"id":"google\/gemini-flash-1.5-8b",
|
246 |
+
"name":"Gemini 1.5 Flash 8B",
|
247 |
+
"provider_name":"Google",
|
248 |
+
"cost":0.15,
|
249 |
+
"hf_id":null,
|
250 |
+
"size":null,
|
251 |
+
"type":"Commercial",
|
252 |
+
"license":null,
|
253 |
+
"creation_date":1727913600000
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"id":"gryphe\/mythomax-l2-13b",
|
257 |
+
"name":"MythoMax 13B",
|
258 |
+
"provider_name":"MythoMax 13B",
|
259 |
+
"cost":0.07,
|
260 |
+
"hf_id":"Gryphe\/MythoMax-L2-13b",
|
261 |
+
"size":null,
|
262 |
+
"type":"Open",
|
263 |
+
"license":"Other",
|
264 |
+
"creation_date":1691625600000
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"id":"microsoft\/wizardlm-2-8x22b",
|
268 |
+
"name":"WizardLM-2 8x22B",
|
269 |
+
"provider_name":"WizardLM-2 8x22B",
|
270 |
+
"cost":0.5,
|
271 |
+
"hf_id":null,
|
272 |
+
"size":null,
|
273 |
+
"type":"Commercial",
|
274 |
+
"license":null,
|
275 |
+
"creation_date":1713225600000
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"id":"x-ai\/grok-3-mini-beta",
|
279 |
+
"name":"Grok 3 Mini Beta",
|
280 |
+
"provider_name":"xAI",
|
281 |
+
"cost":0.5,
|
282 |
+
"hf_id":null,
|
283 |
+
"size":null,
|
284 |
+
"type":"Commercial",
|
285 |
+
"license":null,
|
286 |
+
"creation_date":1744156800000
|
287 |
}
|
288 |
]
|
results.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|