LLMArena commited on
Commit
2f93ee6
·
verified ·
1 Parent(s): c565e52

Upload 2 files

Browse files
elo_results_20250226.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f31ba2cf1bc2d0f02fbad254c601e95520a3a4a4050737f73a4575203c1d7546
3
+ size 1552548
leaderboard_table_20250226.csv ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ rating,variance,rating_q975,rating_q025,num_battles,final_ranking,key,Model,License,Organization,Knowledge cutoff date,Link,MT-bench (score),MMLU
2
+ 1013.1016702417566,77.57118241390364,1030.324807293176,996.6031213695201,1050,13,DeepSeek R1,DeepSeek R1,Open Source,DeepSeek,-,https://huggingface.co/deepseek-ai/DeepSeek-R1,-,-
3
+ 1122.855538322798,460.6393475815251,1163.943575663735,1081.5203901104194,215,1,gpt-4o-2024-11-20,gpt-4o-2024-11-20,Proprietary,OpenAI,11-2024,https://openai.com/api/,70.0,50.0
4
+ 1091.2705988476105,196.57441213399215,1118.465102627979,1063.5161186573273,449,1,Google: Gemini Pro 1.5,Google: Gemini Pro 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
5
+ 1030.9423203384827,58.74679594176696,1045.9582412976408,1016.0006606641286,1605,9,claude-3-5-sonnet-20241022,claude-3-5-sonnet-20241022,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
6
+ 897.8187335571977,76.39509249229455,914.5359561696358,880.1383904199332,1012,51,claude-3-5-sonnet-20240620,claude-3-5-sonnet-20240620,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
7
+ 969.0106971413104,200.85745244171542,995.1977295217965,940.5309822841064,369,23,DeepSeek V3,DeepSeek V3,Open Source,DeepSeek,-,https://github.com/deepseek-ai/DeepSeek-V3,-,-
8
+ 967.8505452798006,75.21491383212332,983.8146799392994,951.0332430929893,1155,26,gpt-4o-2024-05-13,gpt-4o-2024-05-13,Proprietary,OpenAI,05-2024,https://openai.com/api/,70.0,50.0
9
+ 932.9660490023364,99.40606338067018,951.9635374220687,914.0237874962963,895,36,gpt-4-turbo-2024-04-09,gpt-4-turbo-2024-04-09,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
10
+ 1043.033998213752,66.89086713321484,1058.0811032334946,1025.5903119174654,1244,8,Google: Gemini Flash 1.5,Google: Gemini Flash 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
11
+ 885.1452300250668,78.3123130022037,901.9194667399978,867.5204721724493,1014,53,Llama 3.1 405B Instruct Turbo,Llama 3.1 405B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
12
+ 958.5731523831222,82.55675815379738,975.224737426598,940.7346354898067,981,29,claude-3-5-haiku-20241022,claude-3-5-haiku-20241022,Proprietary,Anthropic,10-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
13
+ 926.4297506834722,89.11689458478128,945.1123791138717,907.3767590759888,998,38,Llama 3.3 70B Instruct,Llama 3.3 70B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
14
+ 989.4069671224117,238.63552979842657,1019.094983984059,956.8548803384201,319,17,Qwen2.5 72B Instruct,Qwen2.5 72B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-72B-Instruct,-,-
15
+ 993.7138901231579,86.30768246976498,1011.5012893680381,975.712513727761,926,18,gpt-4-0613,gpt-4-0613,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
16
+ 946.4237242012255,94.57664726797736,965.7175615882162,928.3437444231776,932,32,GigaChat-Max-preview 4.0.26.20,GigaChat-Max-preview 4.0.26.20,Proprietary,Sber,In training,https://www.sber-bank.by/new/gigachat-29102024,-,-
17
+ 1054.8355720685322,65.17032220450184,1069.9673476179403,1039.5316397262143,1206,7,gpt-4o-mini-2024-07-18,gpt-4o-mini-2024-07-18,Proprietary,OpenAI,07-2024,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/,70.0,50.0
18
+ 1120.1171495751892,80.81737813477498,1138.3128365559478,1103.3602217112575,1175,1,MiniMax: MiniMax-Text-01,MiniMax: MiniMax-Text-01,Open Source,MiniMaxAI,-,https://huggingface.co/MiniMaxAI/MiniMax-Text-01,-,-
19
+ 995.186556486352,57.454369511528235,1010.2342124660305,980.7061096049171,1396,18,Gemma 2 27B,Gemma 2 27B,Proprietary,Google,-,https://blog.google/technology/developers/google-gemma-2/,-,-
20
+ 935.1690055950005,49.509353748743656,948.6947386770961,921.3765172313578,1498,37,Llama 3.1 70B Instruct Turbo,Llama 3.1 70B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
21
+ 1052.1140971944926,48.23183235420028,1065.1122937107848,1038.3964444871037,1643,7,saiga_llama3_70b,saiga_llama3_70b,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_70b_sft_m1_d5_abliterated_awq_4bit,-,-
22
+ 1018.0046152299905,53.97917746445153,1033.1193873883208,1004.2703219105658,1637,11,claude-3-haiku-20240307,claude-3-haiku-20240307,Proprietary,Anthropic,03-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-family,-,-
23
+ 938.9306447005312,48.77055842693372,953.5875117799823,925.6232357825098,1732,36,Cohere: Command R+ (08-2024),Cohere: Command R+ (08-2024),Open Source,Cohere,-,https://docs.cohere.com/v2/docs/command-r-plus,-,-
24
+ 946.0943600604061,70.45274906008729,962.6054451432493,930.4988696503143,1113,33,YandexGPT Experimental,YandexGPT Experimental,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
25
+ 1049.796735688152,128.47648441580293,1071.220817018019,1026.8970643578386,625,6,Qwen 2 Instruct (72B),Qwen 2 Instruct (72B),Open Source,Qwen,12-2023,https://llama.meta.com/llama3/,-,-
26
+ 890.4839419264598,240.48758230877255,920.2531094627387,859.6206779702865,326,50,Mistral Small 3,Mistral Small 3,Open Source,Mistral,-,https://mistral.ai/news/mistral-small-3,-,-
27
+ 963.115990828822,197.26189848393113,992.0048239831293,936.2818996079781,395,24,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,-,-
28
+ 1037.1363560476157,252.29597294156048,1068.0165254706908,1005.4462485277223,326,7,t-tech/T-pro-it-1.0,t-tech/T-pro-it-1.0,Open Source,t-bank-ai,-,https://huggingface.co/t-tech/T-pro-it-1.0,-,-
29
+ 1002.4208309779887,329.9189962642085,1036.9873131172806,965.6492705031914,202,11,YandexGPT 4 Pro,YandexGPT 4 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-4,45.2,35.2
30
+ 1006.7058536057226,54.228904275541886,1020.5864094313596,991.9297770810076,1466,17,RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,-,-
31
+ 1045.984484966619,76.06381144808131,1062.569807655999,1028.8954171676291,1076,8,LLaMA-3 Chat (70B),LLaMA-3 Chat (70B),Proprietary,Meta,12-2023,https://llama.meta.com/llama3/,-,-
32
+ 992.7144461504178,111.2862922501068,1014.3226482738247,972.7272833327368,716,18,GigaChat-Pro 4.0.26.20,GigaChat-Pro 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
33
+ 997.0684125674067,307.6829135977552,1029.4464636974558,962.0608449890794,231,13,Qwen2.5 Coder 32B Instruct,Qwen2.5 Coder 32B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct,-,-
34
+ 944.8547554677966,98.08800304395875,963.1607693213294,925.4522944511614,758,33,GigaChat-Pro 4.0.26.15,GigaChat-Pro 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
35
+ 941.0096592322333,71.95836711070318,956.5654070547027,923.6396339301089,1295,36,gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,Proprietary,OpenAI,09-2021,https://openai.com/api/,65.2,45.2
36
+ 1001.2322837163971,114.06596894585029,1020.4920915297746,979.5011203087803,718,17,YandexGPT 3 Pro,YandexGPT 3 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,65.2,45.2
37
+ 937.3875915173693,122.63373426089663,959.0911308632177,915.2429884146027,718,35,t-tech/T-lite-it-1.0,t-tech/T-lite-it-1.0,Open Source,t-bank-ai,-,https://huggingface.co/t-tech/T-lite-it-1.0,-,-
38
+ 941.9497832548282,46.17648322021215,956.0771146511121,928.9856349431067,1797,36,GigaChat 4.0.26.15,GigaChat 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
39
+ 979.8673684141697,49.249425728192264,992.2955103657698,966.6934261247505,1785,24,GigaChat 4.0.26.20,GigaChat 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
40
+ 997.9817333431032,41.16614962897935,1010.223631154852,985.3448463806743,1845,18,Microsoft: Phi 4,Microsoft: Phi 4,Open Source,Microsoft,-,https://huggingface.co/microsoft/phi-4,-,-
41
+ 1012.4334517556845,59.05250513252995,1027.2303146727984,997.6557840525941,1191,15,saiga_llama3_8b_v7,saiga_llama3_8b_v7,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
42
+ 1052.0994173380902,113.99341514992348,1074.6814942030499,1032.8277636677678,694,6,GigaChat-Plus 4.0.26.15,GigaChat-Plus 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
43
+ 1092.0678562249927,57.72218828577788,1106.614749310773,1077.3878109245686,1508,1,saiga_phi3_medium,saiga_phi3_medium,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_phi3_medium_sft_m1_d2_kto_m5_d7,-,-
44
+ 1115.098142968197,115.01399518347422,1136.8943479032355,1094.7177129371664,781,1,GigaChat-Pro 4.0.26.8,GigaChat-Pro 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
45
+ 1014.2276347780177,57.43914722803243,1028.3938992210951,999.8580952768083,1447,15,Llama 3.2 11B Instruct,Llama 3.2 11B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
46
+ 980.4033934263836,42.00301644072328,992.2097281891993,967.0047327599075,1833,24,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,-,-
47
+ 1043.2164530867724,46.166866073405174,1056.5078166593848,1030.5267558498936,1922,8,YandexGPT 3 Lite,YandexGPT 3 Lite,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
48
+ 1063.0132541033074,42.983678438319046,1076.3742792528433,1049.6912247570526,1954,6,T-lite-instruct-0.1,T-lite-instruct-0.1,Open Source,t-bank-ai,In training,https://huggingface.co/AnatoliiPotapov/T-lite-instruct-0.1,-,-
49
+ 1084.795267999312,45.70485204065852,1097.688654176263,1071.2188370972312,1982,3,Llama 3.1 8B Instruct Turbo,Llama 3.1 8B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
50
+ 1121.550966586339,133.68676693471215,1145.0918433305187,1099.456721721364,674,1,Vikhrmodels/it-5.2-fp16-cp,Vikhrmodels/it-5.2-fp16-cp,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/it-5.2-fp16-cp,-,-
51
+ 1042.077544753635,48.25482883586117,1055.6892682931998,1028.4240854409798,1718,8,LLaMA-3 Chat (8B),LLaMA-3 Chat (8B),Proprietary,Meta,03-2023,https://llama.meta.com/llama3/,-,-
52
+ 1015.9476894119709,110.66119295051767,1035.9000435560233,994.9201291895898,697,11,GigaChat 4.0.26.8,GigaChat 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
53
+ 913.2071010664333,259.1829003119546,946.9295598443007,883.511150100826,273,37,GigaChat-Pro 2.2.25.3,GigaChat-Pro 2.2.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
54
+ 960.4856729980762,50.81954451747418,973.7977255105847,946.0126179636994,1413,29,saiga_llama3_8b_v6,saiga_llama3_8b_v6,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
55
+ 954.1431418324074,147.31617467082418,978.1875996270576,931.5262692712564,517,28,GigaChat 3.1.25.3,GigaChat 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
56
+ 975.8486660749547,267.4062823777547,1007.436395323405,942.9704775636646,285,18,MTSAIR/Cotype-Nano,MTSAIR/Cotype-Nano,Open Source,MTSAIR,-,https://huggingface.co/MTSAIR/Cotype-Nano,-,-
57
+ 999.5403950952939,334.08405994546473,1034.2068225585283,960.1713111506967,208,11,GigaChat-Plus 3.1.25.3,GigaChat-Plus 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-