Upload 2 files
Browse files- elo_results_20250110.pkl +3 -0
- leaderboard_table_20250110.csv +48 -0
elo_results_20250110.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1ecef84d690c0208511741ddbf361353aef41bfb8d37d6fbc5fbc870430817b
|
3 |
+
size 1322939
|
leaderboard_table_20250110.csv
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
rating,variance,rating_q975,rating_q025,num_battles,final_ranking,key,Model,License,Organization,Knowledge cutoff date,Link,MT-bench (score),MMLU
|
2 |
+
1014.601416116398,95.42761416342722,1031.8030622266706,995.5779432377947,874,10,gpt-4o-2024-11-20,gpt-4o-2024-11-20,Proprietary,OpenAI,11-2024,https://openai.com/api/,70.0,50.0
|
3 |
+
1028.6722520927017,72.66523382689148,1045.0377418560126,1013.5081851285452,1400,8,Google: Gemini Pro 1.5,Google: Gemini Pro 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
|
4 |
+
899.4807838076714,91.97783163653241,916.757845748061,880.9781441056263,1012,43,claude-3-5-sonnet-20241022,claude-3-5-sonnet-20241022,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
|
5 |
+
970.150626401485,195.83831158206294,996.7807903056374,942.594762811972,369,19,claude-3-5-sonnet-20240620,claude-3-5-sonnet-20240620,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
|
6 |
+
969.710030410187,102.66591954117165,988.5983223238279,952.1133467968432,966,22,gpt-4o-2024-05-13,gpt-4o-2024-05-13,Proprietary,OpenAI,05-2024,https://openai.com/api/,70.0,50.0
|
7 |
+
934.2900258803946,104.89328142310012,953.3870924796843,915.3988128396124,895,30,Google: Gemini Flash 1.5,Google: Gemini Flash 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
|
8 |
+
1043.235878306784,79.73407164821191,1059.2108196495,1025.1351332027082,1054,6,gpt-4-turbo-2024-04-09,gpt-4-turbo-2024-04-09,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
|
9 |
+
886.9408978279039,79.63225478076211,903.1020865674288,870.2938007456198,1014,45,claude-3-5-haiku-20241022,claude-3-5-haiku-20241022,Proprietary,Anthropic,10-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
|
10 |
+
958.6055807880853,112.74778662141505,979.1831063030728,939.5427046905379,789,25,Llama 3.1 405B Instruct Turbo,Llama 3.1 405B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
11 |
+
928.1007722069018,95.21755810912538,947.643642557585,909.9448147084773,998,32,Llama 3.3 70B Instruct,Llama 3.3 70B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
|
12 |
+
990.4205246258342,220.38371740991056,1018.5168805376557,960.662984908135,319,14,Qwen2.5 72B Instruct,Qwen2.5 72B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-72B-Instruct,-,-
|
13 |
+
999.3564382163412,130.5762647646236,1020.1892090200513,978.3167365604505,703,14,gpt-4-0613,gpt-4-0613,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
|
14 |
+
947.9590947257428,94.8599262001665,966.7903885058714,928.2972392427924,932,28,gpt-4o-mini-2024-07-18,gpt-4o-mini-2024-07-18,Proprietary,OpenAI,07-2024,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/,70.0,50.0
|
15 |
+
1066.0197170578479,88.05932371354297,1083.8295654612534,1049.3104535522468,1010,4,GigaChat-Max-preview 4.0.26.20,GigaChat-Max-preview 4.0.26.20,Proprietary,Sber,In training,https://www.sber-bank.by/new/gigachat-29102024,-,-
|
16 |
+
1123.4282107837078,111.14499731389556,1143.8892599439046,1103.9880031464645,976,1,Gemma 2 27B,Gemma 2 27B,Proprietary,Google,-,https://blog.google/technology/developers/google-gemma-2/,-,-
|
17 |
+
996.334979741527,77.51476783575593,1013.1609430703313,980.7826583234983,1219,16,Llama 3.1 70B Instruct Turbo,Llama 3.1 70B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
18 |
+
935.8934353009677,66.51944589587659,950.9776584426802,920.7515735431365,1296,31,saiga_llama3_70b,saiga_llama3_70b,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_70b_sft_m1_d5_abliterated_awq_4bit,-,-
|
19 |
+
1054.6017478567005,69.83407338865166,1068.6524733433114,1039.4575347039724,1436,6,Cohere: Command R+ (08-2024),Cohere: Command R+ (08-2024),Open Source,Cohere,-,https://docs.cohere.com/v2/docs/command-r-plus,-,-
|
20 |
+
1018.8682103317,66.53691825194673,1034.2535043272585,1004.2809896440541,1440,10,claude-3-haiku-20240307,claude-3-haiku-20240307,Proprietary,Anthropic,03-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-family,-,-
|
21 |
+
938.5519621193108,62.35210818277878,953.3306842197004,924.0607450495434,1523,30,YandexGPT Experimental,YandexGPT Experimental,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
|
22 |
+
945.5522126815398,98.83784101225605,963.4407315543721,926.172457470567,895,29,Qwen 2 Instruct (72B),Qwen 2 Instruct (72B),Open Source,Qwen,12-2023,https://llama.meta.com/llama3/,-,-
|
23 |
+
1049.3601246673272,268.8539043987804,1081.4424559549138,1017.1157908470228,307,4,Qwen2.5 Coder 32B Instruct,Qwen2.5 Coder 32B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct,-,-
|
24 |
+
1011.2302039697581,72.46699106384965,1026.4778054853111,995.7922113818295,1281,13,YandexGPT 4 Pro,YandexGPT 4 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-4,45.2,35.2
|
25 |
+
1048.8091161394425,105.41727822185668,1068.056714856125,1029.997633612424,891,6,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,-,-
|
26 |
+
1007.6931712016512,183.81328786181007,1034.0147005265235,982.9945299461845,473,10,GigaChat-Pro 4.0.26.20,GigaChat-Pro 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
27 |
+
946.1489916495897,111.08273604883065,967.2329474562292,927.5264355278966,733,28,LLaMA-3 Chat (70B),LLaMA-3 Chat (70B),Proprietary,Meta,12-2023,https://llama.meta.com/llama3/,-,-
|
28 |
+
942.4210988929068,85.80186562428453,959.0394079675793,924.5496603826895,1229,30,GigaChat-Pro 4.0.26.15,GigaChat-Pro 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
29 |
+
1000.0186663465498,129.42639822614004,1020.1788618209613,978.3110144673684,667,14,gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,Proprietary,OpenAI,09-2021,https://openai.com/api/,65.2,45.2
|
30 |
+
937.1827699309063,117.25592851219093,957.6725401301399,916.8567565942817,678,30,YandexGPT 3 Pro,YandexGPT 3 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,65.2,45.2
|
31 |
+
939.1669561233266,59.35415866617894,953.7645734516894,925.8214485861427,1602,30,GigaChat 4.0.26.15,GigaChat 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
32 |
+
979.0750417391081,55.04725437484819,993.1972069476975,966.2381765215995,1582,22,GigaChat 4.0.26.20,GigaChat 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
33 |
+
1000.7548361153558,53.99793875698641,1014.00084547724,987.8092594542534,1637,15,saiga_llama3_8b_v7,saiga_llama3_8b_v7,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
|
34 |
+
1013.8963052446774,70.81367702887204,1028.8079969623448,999.2817794626338,1191,13,GigaChat-Plus 4.0.26.15,GigaChat-Plus 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
35 |
+
1055.140504532773,197.9206784912106,1082.9301009588107,1030.2794988684605,452,4,saiga_phi3_medium,saiga_phi3_medium,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_phi3_medium_sft_m1_d2_kto_m5_d7,-,-
|
36 |
+
1090.1806101814977,73.66997728114842,1107.0472618600231,1075.507963009294,1302,1,GigaChat-Pro 4.0.26.8,GigaChat-Pro 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
37 |
+
1108.169440303705,147.10251595970394,1132.9238198004584,1085.9286961204143,580,1,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,-,-
|
38 |
+
1014.212531119161,82.63794283809384,1031.0197579114529,997.5360251990695,1247,10,Llama 3.2 11B Instruct,Llama 3.2 11B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
|
39 |
+
983.1138694377424,57.20788672426493,996.9212321261556,969.3994236146024,1641,19,T-lite-instruct-0.1,T-lite-instruct-0.1,Open Source,t-bank-ai,In training,https://huggingface.co/AnatoliiPotapov/T-lite-instruct-0.1,-,-
|
40 |
+
1047.6443206501035,56.18680103002264,1061.346192482414,1034.3110291039816,1703,6,YandexGPT 3 Lite,YandexGPT 3 Lite,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
|
41 |
+
1064.3972954843152,55.160747095995546,1077.3736233553723,1050.6540540576395,1774,4,Llama 3.1 8B Instruct Turbo,Llama 3.1 8B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
|
42 |
+
1087.2338162958565,61.135773014779495,1100.9814397307666,1073.054030076562,1798,2,Vikhrmodels/it-5.2-fp16-cp,Vikhrmodels/it-5.2-fp16-cp,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/it-5.2-fp16-cp,-,-
|
43 |
+
1125.4111365955264,218.70063727393259,1153.8308754299183,1098.001682272981,433,1,LLaMA-3 Chat (8B),LLaMA-3 Chat (8B),Proprietary,Meta,03-2023,https://llama.meta.com/llama3/,-,-
|
44 |
+
1044.5640783496199,61.79178368806712,1058.1597002017886,1030.3881272845933,1527,6,GigaChat 4.0.26.8,GigaChat 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
45 |
+
1016.5650448373378,112.47605180231788,1036.033328470392,995.7624212592001,697,9,GigaChat-Pro 2.2.25.3,GigaChat-Pro 2.2.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
46 |
+
916.0634872583784,272.35969122729426,951.2495327645363,886.8407115039755,273,31,saiga_llama3_8b_v6,saiga_llama3_8b_v6,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
|
47 |
+
963.9466716558762,67.63218250298247,978.2747737876501,948.6332403441685,1225,27,GigaChat 3.1.25.3,GigaChat 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|
48 |
+
954.6876322077326,158.09864327748994,979.7924175755672,930.1448825373692,517,25,GigaChat-Plus 3.1.25.3,GigaChat-Plus 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
|