Upload 2 files
Browse files- elo_results_20250226.pkl +3 -0
- leaderboard_table_20250226.csv +57 -0
elo_results_20250226.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f31ba2cf1bc2d0f02fbad254c601e95520a3a4a4050737f73a4575203c1d7546
|
3 |
+
size 1552548
|
leaderboard_table_20250226.csv
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
rating,variance,rating_q975,rating_q025,num_battles,final_ranking,key,Model,License,Organization,Knowledge cutoff date,Link,MT-bench (score),MMLU
|
2 |
+
1013.1016702417566,77.57118241390364,1030.324807293176,996.6031213695201,1050,13,DeepSeek R1,DeepSeek R1,Open Source,DeepSeek,-,https://huggingface.co/deepseek-ai/DeepSeek-R1,-,-
|
3 |
+
1122.855538322798,460.6393475815251,1163.943575663735,1081.5203901104194,215,1,gpt-4o-2024-11-20,gpt-4o-2024-11-20,Proprietary,OpenAI,11-2024,https://openai.com/api/,70.0,50.0
|
4 |
+
1091.2705988476105,196.57441213399215,1118.465102627979,1063.5161186573273,449,1,Google: Gemini Pro 1.5,Google: Gemini Pro 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
|
5 |
+
1030.9423203384827,58.74679594176696,1045.9582412976408,1016.0006606641286,1605,9,claude-3-5-sonnet-20241022,claude-3-5-sonnet-20241022,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
|
6 |
+
897.8187335571977,76.39509249229455,914.5359561696358,880.1383904199332,1012,51,claude-3-5-sonnet-20240620,claude-3-5-sonnet-20240620,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
|
7 |
+
969.0106971413104,200.85745244171542,995.1977295217965,940.5309822841064,369,23,DeepSeek V3,DeepSeek V3,Open Source,DeepSeek,-,https://github.com/deepseek-ai/DeepSeek-V3,-,-
|
8 |
+
967.8505452798006,75.21491383212332,983.8146799392994,951.0332430929893,1155,26,gpt-4o-2024-05-13,gpt-4o-2024-05-13,Proprietary,OpenAI,05-2024,https://openai.com/api/,70.0,50.0
|
9 |
+
932.9660490023364,99.40606338067018,951.9635374220687,914.0237874962963,895,36,gpt-4-turbo-2024-04-09,gpt-4-turbo-2024-04-09,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
|
10 |
+
1043.033998213752,66.89086713321484,1058.0811032334946,1025.5903119174654,1244,8,Google: Gemini Flash 1.5,Google: Gemini Flash 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
|
11 |
+
885.1452300250668,78.3123130022037,901.9194667399978,867.5204721724493,1014,53,Llama 3.1 405B Instruct Turbo,Llama 3.1 405B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
12 |
+
958.5731523831222,82.55675815379738,975.224737426598,940.7346354898067,981,29,claude-3-5-haiku-20241022,claude-3-5-haiku-20241022,Proprietary,Anthropic,10-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
|
13 |
+
926.4297506834722,89.11689458478128,945.1123791138717,907.3767590759888,998,38,Llama 3.3 70B Instruct,Llama 3.3 70B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
|
14 |
+
989.4069671224117,238.63552979842657,1019.094983984059,956.8548803384201,319,17,Qwen2.5 72B Instruct,Qwen2.5 72B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-72B-Instruct,-,-
|
15 |
+
993.7138901231579,86.30768246976498,1011.5012893680381,975.712513727761,926,18,gpt-4-0613,gpt-4-0613,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
|
16 |
+
946.4237242012255,94.57664726797736,965.7175615882162,928.3437444231776,932,32,GigaChat-Max-preview 4.0.26.20,GigaChat-Max-preview 4.0.26.20,Proprietary,Sber,In training,https://www.sber-bank.by/new/gigachat-29102024,-,-
|
17 |
+
1054.8355720685322,65.17032220450184,1069.9673476179403,1039.5316397262143,1206,7,gpt-4o-mini-2024-07-18,gpt-4o-mini-2024-07-18,Proprietary,OpenAI,07-2024,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/,70.0,50.0
|
18 |
+
1120.1171495751892,80.81737813477498,1138.3128365559478,1103.3602217112575,1175,1,MiniMax: MiniMax-Text-01,MiniMax: MiniMax-Text-01,Open Source,MiniMaxAI,-,https://huggingface.co/MiniMaxAI/MiniMax-Text-01,-,-
|
19 |
+
995.186556486352,57.454369511528235,1010.2342124660305,980.7061096049171,1396,18,Gemma 2 27B,Gemma 2 27B,Proprietary,Google,-,https://blog.google/technology/developers/google-gemma-2/,-,-
|
20 |
+
935.1690055950005,49.509353748743656,948.6947386770961,921.3765172313578,1498,37,Llama 3.1 70B Instruct Turbo,Llama 3.1 70B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
21 |
+
1052.1140971944926,48.23183235420028,1065.1122937107848,1038.3964444871037,1643,7,saiga_llama3_70b,saiga_llama3_70b,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_70b_sft_m1_d5_abliterated_awq_4bit,-,-
|
22 |
+
1018.0046152299905,53.97917746445153,1033.1193873883208,1004.2703219105658,1637,11,claude-3-haiku-20240307,claude-3-haiku-20240307,Proprietary,Anthropic,03-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-family,-,-
|
23 |
+
938.9306447005312,48.77055842693372,953.5875117799823,925.6232357825098,1732,36,Cohere: Command R+ (08-2024),Cohere: Command R+ (08-2024),Open Source,Cohere,-,https://docs.cohere.com/v2/docs/command-r-plus,-,-
|
24 |
+
946.0943600604061,70.45274906008729,962.6054451432493,930.4988696503143,1113,33,YandexGPT Experimental,YandexGPT Experimental,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
|
25 |
+
1049.796735688152,128.47648441580293,1071.220817018019,1026.8970643578386,625,6,Qwen 2 Instruct (72B),Qwen 2 Instruct (72B),Open Source,Qwen,12-2023,https://llama.meta.com/llama3/,-,-
|
26 |
+
890.4839419264598,240.48758230877255,920.2531094627387,859.6206779702865,326,50,Mistral Small 3,Mistral Small 3,Open Source,Mistral,-,https://mistral.ai/news/mistral-small-3,-,-
|
27 |
+
963.115990828822,197.26189848393113,992.0048239831293,936.2818996079781,395,24,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,-,-
|
28 |
+
1037.1363560476157,252.29597294156048,1068.0165254706908,1005.4462485277223,326,7,t-tech/T-pro-it-1.0,t-tech/T-pro-it-1.0,Open Source,t-bank-ai,-,https://huggingface.co/t-tech/T-pro-it-1.0,-,-
|
29 |
+
1002.4208309779887,329.9189962642085,1036.9873131172806,965.6492705031914,202,11,YandexGPT 4 Pro,YandexGPT 4 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-4,45.2,35.2
|
30 |
+
1006.7058536057226,54.228904275541886,1020.5864094313596,991.9297770810076,1466,17,RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,-,-
|
31 |
+
1045.984484966619,76.06381144808131,1062.569807655999,1028.8954171676291,1076,8,LLaMA-3 Chat (70B),LLaMA-3 Chat (70B),Proprietary,Meta,12-2023,https://llama.meta.com/llama3/,-,-
|
32 |
+
992.7144461504178,111.2862922501068,1014.3226482738247,972.7272833327368,716,18,GigaChat-Pro 4.0.26.20,GigaChat-Pro 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
33 |
+
997.0684125674067,307.6829135977552,1029.4464636974558,962.0608449890794,231,13,Qwen2.5 Coder 32B Instruct,Qwen2.5 Coder 32B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct,-,-
|
34 |
+
944.8547554677966,98.08800304395875,963.1607693213294,925.4522944511614,758,33,GigaChat-Pro 4.0.26.15,GigaChat-Pro 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
35 |
+
941.0096592322333,71.95836711070318,956.5654070547027,923.6396339301089,1295,36,gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,Proprietary,OpenAI,09-2021,https://openai.com/api/,65.2,45.2
|
36 |
+
1001.2322837163971,114.06596894585029,1020.4920915297746,979.5011203087803,718,17,YandexGPT 3 Pro,YandexGPT 3 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,65.2,45.2
|
37 |
+
937.3875915173693,122.63373426089663,959.0911308632177,915.2429884146027,718,35,t-tech/T-lite-it-1.0,t-tech/T-lite-it-1.0,Open Source,t-bank-ai,-,https://huggingface.co/t-tech/T-lite-it-1.0,-,-
|
38 |
+
941.9497832548282,46.17648322021215,956.0771146511121,928.9856349431067,1797,36,GigaChat 4.0.26.15,GigaChat 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
39 |
+
979.8673684141697,49.249425728192264,992.2955103657698,966.6934261247505,1785,24,GigaChat 4.0.26.20,GigaChat 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
40 |
+
997.9817333431032,41.16614962897935,1010.223631154852,985.3448463806743,1845,18,Microsoft: Phi 4,Microsoft: Phi 4,Open Source,Microsoft,-,https://huggingface.co/microsoft/phi-4,-,-
|
41 |
+
1012.4334517556845,59.05250513252995,1027.2303146727984,997.6557840525941,1191,15,saiga_llama3_8b_v7,saiga_llama3_8b_v7,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
|
42 |
+
1052.0994173380902,113.99341514992348,1074.6814942030499,1032.8277636677678,694,6,GigaChat-Plus 4.0.26.15,GigaChat-Plus 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
43 |
+
1092.0678562249927,57.72218828577788,1106.614749310773,1077.3878109245686,1508,1,saiga_phi3_medium,saiga_phi3_medium,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_phi3_medium_sft_m1_d2_kto_m5_d7,-,-
|
44 |
+
1115.098142968197,115.01399518347422,1136.8943479032355,1094.7177129371664,781,1,GigaChat-Pro 4.0.26.8,GigaChat-Pro 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
45 |
+
1014.2276347780177,57.43914722803243,1028.3938992210951,999.8580952768083,1447,15,Llama 3.2 11B Instruct,Llama 3.2 11B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
|
46 |
+
980.4033934263836,42.00301644072328,992.2097281891993,967.0047327599075,1833,24,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,-,-
|
47 |
+
1043.2164530867724,46.166866073405174,1056.5078166593848,1030.5267558498936,1922,8,YandexGPT 3 Lite,YandexGPT 3 Lite,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
|
48 |
+
1063.0132541033074,42.983678438319046,1076.3742792528433,1049.6912247570526,1954,6,T-lite-instruct-0.1,T-lite-instruct-0.1,Open Source,t-bank-ai,In training,https://huggingface.co/AnatoliiPotapov/T-lite-instruct-0.1,-,-
|
49 |
+
1084.795267999312,45.70485204065852,1097.688654176263,1071.2188370972312,1982,3,Llama 3.1 8B Instruct Turbo,Llama 3.1 8B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
50 |
+
1121.550966586339,133.68676693471215,1145.0918433305187,1099.456721721364,674,1,Vikhrmodels/it-5.2-fp16-cp,Vikhrmodels/it-5.2-fp16-cp,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/it-5.2-fp16-cp,-,-
|
51 |
+
1042.077544753635,48.25482883586117,1055.6892682931998,1028.4240854409798,1718,8,LLaMA-3 Chat (8B),LLaMA-3 Chat (8B),Proprietary,Meta,03-2023,https://llama.meta.com/llama3/,-,-
|
52 |
+
1015.9476894119709,110.66119295051767,1035.9000435560233,994.9201291895898,697,11,GigaChat 4.0.26.8,GigaChat 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
53 |
+
913.2071010664333,259.1829003119546,946.9295598443007,883.511150100826,273,37,GigaChat-Pro 2.2.25.3,GigaChat-Pro 2.2.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
54 |
+
960.4856729980762,50.81954451747418,973.7977255105847,946.0126179636994,1413,29,saiga_llama3_8b_v6,saiga_llama3_8b_v6,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
|
55 |
+
954.1431418324074,147.31617467082418,978.1875996270576,931.5262692712564,517,28,GigaChat 3.1.25.3,GigaChat 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
56 |
+
975.8486660749547,267.4062823777547,1007.436395323405,942.9704775636646,285,18,MTSAIR/Cotype-Nano,MTSAIR/Cotype-Nano,Open Source,MTSAIR,-,https://huggingface.co/MTSAIR/Cotype-Nano,-,-
|
57 |
+
999.5403950952939,334.08405994546473,1034.2068225585283,960.1713111506967,208,11,GigaChat-Plus 3.1.25.3,GigaChat-Plus 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|