David Pomerenke commited on
Commit
092c06a
·
1 Parent(s): 5fa433f

Block gemini-2.5-pro-exp-03-25

Browse files
Files changed (5) hide show
  1. evals/main.py +1 -1
  2. evals/models.py +6 -4
  3. languages.json +5 -5
  4. models.json +66 -0
  5. results.json +0 -0
evals/main.py CHANGED
@@ -10,7 +10,7 @@ from tasks import tasks
10
  # ===== config =====
11
 
12
  n_sentences = 10
13
- n_languages = 40
14
  n_models = 25
15
 
16
  # ===== run evaluation and aggregate results =====
 
10
  # ===== config =====
11
 
12
  n_sentences = 10
13
+ n_languages = 20
14
  n_models = 25
15
 
16
  # ===== run evaluation and aggregate results =====
evals/models.py CHANGED
@@ -44,6 +44,10 @@ models = [
44
  "amazon/nova-micro-v1", # 0.09$
45
  ]
46
 
 
 
 
 
47
  transcription_models = [
48
  "elevenlabs/scribe_v1",
49
  "openai/whisper-large-v3",
@@ -58,7 +62,6 @@ cache = Memory(location=".cache", verbose=0).cache
58
  def get_models(date: date):
59
  return get("https://openrouter.ai/api/frontend/models").json()["data"]
60
 
61
-
62
  def get_slug(permaslug):
63
  models = get_models(date.today())
64
  slugs = [m["slug"] for m in models if m["permaslug"] == permaslug]
@@ -88,12 +91,11 @@ def get_current_popular_models(date: date):
88
  data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
89
  return [get_slug(model["model_permaslug"]) for model in data]
90
 
91
-
92
  models += [
93
- m for m in get_historical_popular_models(date.today()) if m and m not in models
94
  ][:5]
95
  models += [
96
- m for m in get_current_popular_models(date.today()) if m and m not in models
97
  ][:5]
98
 
99
 
 
44
  "amazon/nova-micro-v1", # 0.09$
45
  ]
46
 
47
+ blocklist = [
48
+ "google/gemini-2.5-pro-exp-03-25" # rate limit too low
49
+ ]
50
+
51
  transcription_models = [
52
  "elevenlabs/scribe_v1",
53
  "openai/whisper-large-v3",
 
62
  def get_models(date: date):
63
  return get("https://openrouter.ai/api/frontend/models").json()["data"]
64
 
 
65
  def get_slug(permaslug):
66
  models = get_models(date.today())
67
  slugs = [m["slug"] for m in models if m["permaslug"] == permaslug]
 
91
  data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
92
  return [get_slug(model["model_permaslug"]) for model in data]
93
 
 
94
  models += [
95
+ m for m in get_historical_popular_models(date.today()) if m and m not in models and m not in blocklist
96
  ][:5]
97
  models += [
98
+ m for m in get_current_popular_models(date.today()) if m and m not in models and m not in blocklist
99
  ][:5]
100
 
101
 
languages.json CHANGED
@@ -1027,7 +1027,7 @@
1027
  "family":"Uralic",
1028
  "flores_path":"hun_Latn",
1029
  "fleurs_tag":"hu_hu",
1030
- "commonvoice_hours":92.0,
1031
  "commonvoice_locale":"hu",
1032
  "in_benchmark":true
1033
  },
@@ -2359,7 +2359,7 @@
2359
  "family":"Atlantic-Congo",
2360
  "flores_path":null,
2361
  "fleurs_tag":null,
2362
- "commonvoice_hours":4.1,
2363
  "commonvoice_locale":"ibb",
2364
  "in_benchmark":false
2365
  },
@@ -5347,7 +5347,7 @@
5347
  "family":"Atlantic-Congo",
5348
  "flores_path":null,
5349
  "fleurs_tag":null,
5350
- "commonvoice_hours":2.3,
5351
  "commonvoice_locale":"mua",
5352
  "in_benchmark":false
5353
  },
@@ -6199,7 +6199,7 @@
6199
  "family":"Atlantic-Congo",
6200
  "flores_path":null,
6201
  "fleurs_tag":null,
6202
- "commonvoice_hours":3.9,
6203
  "commonvoice_locale":"jgo",
6204
  "in_benchmark":false
6205
  },
@@ -6367,7 +6367,7 @@
6367
  "family":"Indo-European",
6368
  "flores_path":null,
6369
  "fleurs_tag":null,
6370
- "commonvoice_hours":0.6,
6371
  "commonvoice_locale":"btv",
6372
  "in_benchmark":false
6373
  },
 
1027
  "family":"Uralic",
1028
  "flores_path":"hun_Latn",
1029
  "fleurs_tag":"hu_hu",
1030
+ "commonvoice_hours":93.0,
1031
  "commonvoice_locale":"hu",
1032
  "in_benchmark":true
1033
  },
 
2359
  "family":"Atlantic-Congo",
2360
  "flores_path":null,
2361
  "fleurs_tag":null,
2362
+ "commonvoice_hours":4.3,
2363
  "commonvoice_locale":"ibb",
2364
  "in_benchmark":false
2365
  },
 
5347
  "family":"Atlantic-Congo",
5348
  "flores_path":null,
5349
  "fleurs_tag":null,
5350
+ "commonvoice_hours":2.6,
5351
  "commonvoice_locale":"mua",
5352
  "in_benchmark":false
5353
  },
 
6199
  "family":"Atlantic-Congo",
6200
  "flores_path":null,
6201
  "fleurs_tag":null,
6202
+ "commonvoice_hours":4.9,
6203
  "commonvoice_locale":"jgo",
6204
  "in_benchmark":false
6205
  },
 
6367
  "family":"Indo-European",
6368
  "flores_path":null,
6369
  "fleurs_tag":null,
6370
+ "commonvoice_hours":0.8,
6371
  "commonvoice_locale":"btv",
6372
  "in_benchmark":false
6373
  },
models.json CHANGED
@@ -218,5 +218,71 @@
218
  "type":"Commercial",
219
  "license":null,
220
  "creation_date":1733356800000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  }
222
  ]
 
218
  "type":"Commercial",
219
  "license":null,
220
  "creation_date":1733356800000
221
+ },
222
+ {
223
+ "id":"google\/gemini-2.0-flash-001",
224
+ "name":"Gemini 2.0 Flash",
225
+ "provider_name":"Google",
226
+ "cost":0.4,
227
+ "hf_id":null,
228
+ "size":null,
229
+ "type":"Commercial",
230
+ "license":null,
231
+ "creation_date":1738713600000
232
+ },
233
+ {
234
+ "id":"google\/gemini-flash-1.5",
235
+ "name":"Gemini 1.5 Flash ",
236
+ "provider_name":"Google",
237
+ "cost":0.3,
238
+ "hf_id":null,
239
+ "size":null,
240
+ "type":"Commercial",
241
+ "license":null,
242
+ "creation_date":1715644800000
243
+ },
244
+ {
245
+ "id":"google\/gemini-flash-1.5-8b",
246
+ "name":"Gemini 1.5 Flash 8B",
247
+ "provider_name":"Google",
248
+ "cost":0.15,
249
+ "hf_id":null,
250
+ "size":null,
251
+ "type":"Commercial",
252
+ "license":null,
253
+ "creation_date":1727913600000
254
+ },
255
+ {
256
+ "id":"gryphe\/mythomax-l2-13b",
257
+ "name":"MythoMax 13B",
258
+ "provider_name":"MythoMax 13B",
259
+ "cost":0.07,
260
+ "hf_id":"Gryphe\/MythoMax-L2-13b",
261
+ "size":null,
262
+ "type":"Open",
263
+ "license":"Other",
264
+ "creation_date":1691625600000
265
+ },
266
+ {
267
+ "id":"microsoft\/wizardlm-2-8x22b",
268
+ "name":"WizardLM-2 8x22B",
269
+ "provider_name":"WizardLM-2 8x22B",
270
+ "cost":0.5,
271
+ "hf_id":null,
272
+ "size":null,
273
+ "type":"Commercial",
274
+ "license":null,
275
+ "creation_date":1713225600000
276
+ },
277
+ {
278
+ "id":"x-ai\/grok-3-mini-beta",
279
+ "name":"Grok 3 Mini Beta",
280
+ "provider_name":"xAI",
281
+ "cost":0.5,
282
+ "hf_id":null,
283
+ "size":null,
284
+ "type":"Commercial",
285
+ "license":null,
286
+ "creation_date":1744156800000
287
  }
288
  ]
results.json CHANGED
The diff for this file is too large to render. See raw diff