cptu_bench / data.json
djstrong's picture
Update data.json
3f5a790 verified
raw
history blame contribute delete
17.3 kB
[
{
"Model": "mistralai/Mistral-Large-Instruct-2407",
"Params": "123B",
"Sentiment": 4.230769230769231,
"Language understanding": 4.0,
"Phraseology": 3.86,
"Tricky questions": 3.646067415730337
},
{
"Model": "mistralai/Mistral-Large-Instruct-2411",
"Params": "123B",
"Sentiment": 4.326923076923077,
"Language understanding": 3.975,
"Phraseology": 3.99,
"Tricky questions": 3.7247191011235956
},
{
"Model": "alpindale/WizardLM-2-8x22B (API)",
"Params": "141B",
"Sentiment": 3.7051282051282053,
"Language understanding": 3.815,
"Phraseology": 4.22,
"Tricky questions": 3.056179775280899
},
{
"Model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"Params": "70.6B",
"Sentiment": 4.326923076923077,
"Language understanding": 3.91,
"Phraseology": 3.25,
"Tricky questions": 3.0112359550561796
},
{
"Model": "meta-llama/Meta-Llama-3-70B-Instruct",
"Params": "70.6B",
"Sentiment": 4.134615384615385,
"Language understanding": 3.82,
"Phraseology": 3.465,
"Tricky questions": 3.707865168539326
},
{
"Model": "speakleash/Bielik-11B-v2.3-Instruct",
"Params": "11.2B",
"Sentiment": 3.9743589743589745,
"Language understanding": 3.785,
"Phraseology": 3.55,
"Tricky questions": 3.2191011235955056
},
{
"Model": "mistralai/Mixtral-8x22B-Instruct-v0.1 (API)",
"Params": "141B",
"Sentiment": 3.782051282051282,
"Language understanding": 3.675,
"Phraseology": 3.55,
"Tricky questions": 3.235955056179775
},
{
"Model": "speakleash/Bielik-11B-v2.1-Instruct",
"Params": "11.2B",
"Sentiment": 3.9551282051282053,
"Language understanding": 3.915,
"Phraseology": 3.105,
"Tricky questions": 3.4719101123595504
},
{
"Model": "Qwen/Qwen2-72B-Instruct",
"Params": "72.7B",
"Sentiment": 3.7628205128205128,
"Language understanding": 3.89,
"Phraseology": 3.28,
"Tricky questions": 3.6797752808988764
},
{
"Model": "speakleash/Bielik-11B-v2.0-Instruct",
"Params": "11.2B",
"Sentiment": 3.9743589743589745,
"Language understanding": 3.745,
"Phraseology": 3.125,
"Tricky questions": 2.196629213483146
},
{
"Model": "speakleash/Bielik-11B-v2.2-Instruct",
"Params": "11.2B",
"Sentiment": 3.717948717948718,
"Language understanding": 3.73,
"Phraseology": 3.25,
"Tricky questions": 3.1235955056179776
},
{
"Model": "Qwen/Qwen1.5-72B-Chat",
"Params": "72.3B",
"Sentiment": 3.4743589743589745,
"Language understanding": 3.515,
"Phraseology": 2.975,
"Tricky questions": 2.668539325842697
},
{
"Model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"Params": "8.03B",
"Sentiment": 3.9743589743589745,
"Language understanding": 3.38,
"Phraseology": 2.58,
"Tricky questions": 2.1123595505617976
},
{
"Model": "THUDM/glm-4-9b-chat",
"Params": "9.4B",
"Sentiment": 3.58974358974359,
"Language understanding": 3.455,
"Phraseology": 2.78,
"Tricky questions": 1.9831460674157304
},
{
"Model": "mistralai/Mistral-Nemo-Instruct-2407",
"Params": "12.2B",
"Sentiment": 3.641025641025641,
"Language understanding": 3.29,
"Phraseology": 2.74,
"Tricky questions": 2.0898876404494384
},
{
"Model": "meta-llama/Meta-Llama-3-8B-Instruct",
"Params": "8.03B",
"Sentiment": 3.3333333333333335,
"Language understanding": 3.15,
"Phraseology": 3.035,
"Tricky questions": 2.4775280898876404
},
{
"Model": "upstage/SOLAR-10.7B-Instruct-v1.0",
"Params": "10.7B",
"Sentiment": 2.967948717948718,
"Language understanding": 3.18,
"Phraseology": 3.255,
"Tricky questions": 2.1235955056179776
},
{
"Model": "speakleash/Bielik-7B-Instruct-v0.1",
"Params": "7.24B",
"Sentiment": 3.58974358974359,
"Language understanding": 3.475,
"Phraseology": 2.315,
"Tricky questions": 2.157303370786517
},
{
"Model": "openchat/openchat-3.5-0106-gemma",
"Params": "8.54B",
"Sentiment": 3.730769230769231,
"Language understanding": 3.08,
"Phraseology": 2.445,
"Tricky questions": 1.6797752808988764
},
{
"Model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"Params": "46.7B",
"Sentiment": 3.0576923076923075,
"Language understanding": 3.175,
"Phraseology": 2.885,
"Tricky questions": 1.797752808988764
},
{
"Model": "mistralai/Mistral-7B-Instruct-v0.3",
"Params": "7.25B",
"Sentiment": 3.326923076923077,
"Language understanding": 3.06,
"Phraseology": 2.68,
"Tricky questions": 1.9887640449438202
},
{
"Model": "berkeley-nest/Starling-LM-7B-alpha",
"Params": "7.24B",
"Sentiment": 3.0576923076923075,
"Language understanding": 2.925,
"Phraseology": 2.855,
"Tricky questions": 1.6797752808988764
},
{
"Model": "openchat/openchat-3.5-0106",
"Params": "7.24B",
"Sentiment": 3.16025641025641,
"Language understanding": 2.835,
"Phraseology": 2.555,
"Tricky questions": 1.9606741573033708
},
{
"Model": "internlm/internlm2-chat-20b",
"Params": "19.9B",
"Sentiment": 3.301282051282051,
"Language understanding": 2.785,
"Phraseology": 2.385,
"Tricky questions": 0.12359550561797752
},
{
"Model": "01-ai/Yi-1.5-34B-Chat",
"Params": "34.4B",
"Sentiment": 3.076923076923077,
"Language understanding": 2.87,
"Phraseology": 2.38,
"Tricky questions": 1.0
},
{
"Model": "Voicelab/trurl-2-13b-academic",
"Params": "13B",
"Sentiment": 3.301282051282051,
"Language understanding": 2.755,
"Phraseology": 2.165,
"Tricky questions": 1.0168539325842696
},
{
"Model": "google/gemma-2-2b-it",
"Params": "2.61B",
"Sentiment": 3.3974358974359,
"Language understanding": 2.9,
"Phraseology": 2.095,
"Tricky questions": 2.2134831460674156
},
{
"Model": "Qwen/Qwen2.5-3B-Instruct",
"Params": "3.09B",
"Sentiment": 2.948717948717949,
"Language understanding": 2.455,
"Phraseology": 2.8,
"Tricky questions": 1.8089887640449438
},
{
"Model": "NousResearch/Hermes-3-Llama-3.2-3B",
"Params": "3.21B",
"Sentiment": 2.6153846153846154,
"Language understanding": 2.705,
"Phraseology": 2.765,
"Tricky questions": 1.1404494382022472
},
{
"Model": "ibm-granite/granite-3.1-2b-instruct",
"Params": "2.53B",
"Sentiment": 3.076923076923077,
"Language understanding": 2.235,
"Phraseology": 1.88,
"Tricky questions": 0.5898876404494382
},
{
"Model": "meta-llama/Llama-3.2-1B-Instruct",
"Params": "1.24B",
"Sentiment": 3.076923076923077,
"Language understanding": 1.735,
"Phraseology": 2.34,
"Tricky questions": 0.5224719101123596
},
{
"Model": "microsoft/Phi-3.5-mini-instruct",
"Params": "3.82B",
"Sentiment": 2.435897435897436,
"Language understanding": 2.135,
"Phraseology": 2.425,
"Tricky questions": 1.0449438202247192
},
{
"Model": "meta-llama/Llama-3.2-3B-Instruct",
"Params": "3.21B",
"Sentiment": 2.7564102564102564,
"Language understanding": 2.295,
"Phraseology": 1.72,
"Tricky questions": 1.2191011235955056
},
{
"Model": "h2oai/h2o-danube2-1.8b-chat",
"Params": "1.83B",
"Sentiment": 2.371794871794872,
"Language understanding": 1.595,
"Phraseology": 2.47,
"Tricky questions": 0.12921348314606743
},
{
"Model": "Qwen/Qwen2.5-1.5B-Instruct",
"Params": "1.54B",
"Sentiment": 2.7948717948717947,
"Language understanding": 1.35,
"Phraseology": 2.225,
"Tricky questions": 0.6629213483146067
},
{
"Model": "utter-project/EuroLLM-1.7B-Instruct",
"Params": "1.66B",
"Sentiment": 2.243589743589744,
"Language understanding": 1.79,
"Phraseology": 2.26,
"Tricky questions": 0.7584269662921348
},
{
"Model": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct",
"Params": "2.41B",
"Sentiment": 1.9423076923076923,
"Language understanding": 2.1155778894472363,
"Phraseology": 2.130653266331658,
"Tricky questions": 0.4887640449438202
},
{
"Model": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
"Params": "1.71B",
"Sentiment": 2.275641025641025,
"Language understanding": 1.1,
"Phraseology": 2.355,
"Tricky questions": 0.25280898876404495
},
{
"Model": "Qwen/Qwen2.5-0.5B-Instruct",
"Params": "0.49B",
"Sentiment": 1.955128205128205,
"Language understanding": 0.835,
"Phraseology": 2.595,
"Tricky questions": 0.21910112359550563
},
{
"Model": "CYFRAGOVPL/Llama-PLLuM-70B-chat",
"Params": "70.6B",
"Sentiment": 3.94,
"Language understanding": 3.61,
"Phraseology": 3.35,
"Tricky questions": 3.2134831460674156
},
{
"Model": "CYFRAGOVPL/PLLuM-8x7B-nc-instruct",
"Params": "46.7B",
"Sentiment": 3.88,
"Language understanding": 3.59,
"Phraseology": 3.22,
"Tricky questions": 1.7640449438202248
},
{
"Model": "CYFRAGOVPL/Llama-PLLuM-70B-instruct",
"Params": "70.6B",
"Sentiment": 3.78,
"Language understanding": 3.63,
"Phraseology": 3.26,
"Tricky questions": 2.634831460674157
},
{
"Model": "CYFRAGOVPL/PLLuM-8x7B-instruct",
"Params": "46.7B",
"Sentiment": 3.59,
"Language understanding": 3.47,
"Phraseology": 3.46,
"Tricky questions": 1.5056179775280898
},
{
"Model": "CYFRAGOVPL/PLLuM-12B-instruct",
"Params": "12.2B",
"Sentiment": 3.71,
"Language understanding": 3.17,
"Phraseology": 3.59,
"Tricky questions": 1.904494382022472
},
{
"Model": "CYFRAGOVPL/PLLuM-8x7B-nc-chat",
"Params": "46.7B",
"Sentiment": 3.76,
"Language understanding": 3.48,
"Phraseology": 3.08,
"Tricky questions": 1.797752808988764
},
{
"Model": "CYFRAGOVPL/PLLuM-8x7B-chat",
"Params": "46.7B",
"Sentiment": 3.44,
"Language understanding": 3.45,
"Phraseology": 3.35,
"Tricky questions": 1.7808988764044944
},
{
"Model": "CYFRAGOVPL/PLLuM-12B-chat",
"Params": "12.2B",
"Sentiment": 3.32,
"Language understanding": 3.21,
"Phraseology": 3.43,
"Tricky questions": 2.5898876404494384
},
{
"Model": "CYFRAGOVPL/PLLuM-12B-nc-instruct",
"Params": "12.2B",
"Sentiment": 3.24,
"Language understanding": 3.31,
"Phraseology": 3.32,
"Tricky questions": 1.9831460674157304
},
{
"Model": "CYFRAGOVPL/Llama-PLLuM-8B-instruct",
"Params": "8.03B",
"Sentiment": 3.24,
"Language understanding": 2.90,
"Phraseology": 3.46,
"Tricky questions": 1.6629213483146068
},
{
"Model": "CYFRAGOVPL/Llama-PLLuM-8B-chat",
"Params": "8.03B",
"Sentiment": 3.13,
"Language understanding": 2.93,
"Phraseology": 3.36,
"Tricky questions": 2.252808988764045
},
{
"Model": "CYFRAGOVPL/PLLuM-12B-nc-chat",
"Params": "12.2B",
"Sentiment": 3.22,
"Language understanding": 3.23,
"Phraseology": 3.54,
"Tricky questions": 2.6235955056179776
},
{
"Model": "Qwen/Qwen2.5-72B-Instruct",
"Params": "72.7B",
"Sentiment": 4.076923076923077,
"Language understanding": 3.97,
"Phraseology": 3.93,
"Tricky questions": 3.808988764044944
},
{
"Model": "Qwen/Qwen2.5-32B-Instruct",
"Params": "32.8B",
"Sentiment": 3.8141025641025643,
"Language understanding": 3.565,
"Phraseology": 4.035,
"Tricky questions": 3.5898876404494384
},
{
"Model": "mistralai/Mistral-Small-24B-Instruct-2501",
"Params": "23.6B",
"Sentiment": 3.91025641025641,
"Language understanding": 3.6,
"Phraseology": 3.875,
"Tricky questions": 3.449438202247191
},
{
"Model": "meta-llama/Llama-3.3-70B-Instruct",
"Params": "70.6B",
"Sentiment": 4.294871794871795,
"Language understanding": 3.865,
"Phraseology": 3.04,
"Tricky questions": 3.3764044943820224
},
{
"Model": "Qwen/Qwen2.5-14B-Instruct",
"Params": "14.8B",
"Sentiment": 3.91025641025641,
"Language understanding": 3.565,
"Phraseology": 3.37,
"Tricky questions": 3.337078651685393
},
{
"Model": "microsoft/phi-4",
"Params": "14.7B",
"Sentiment": 3.717948717948718,
"Language understanding": 3.54,
"Phraseology": 3.235,
"Tricky questions": 2.7247191011235956
},
{
"Model": "Qwen/Qwen2.5-7B-Instruct",
"Params": "7.62B",
"Sentiment": 3.5576923076923075,
"Language understanding": 3.025,
"Phraseology": 3.095,
"Tricky questions": 2.5842696629213484
},
{
"Model": "microsoft/Phi-4-mini-instruct",
"Params": "3.84B",
"Sentiment": 2.6923076923076925,
"Language understanding": 2.43,
"Phraseology": 2.245,
"Tricky questions": 1.303370786516854
},
{
"Model": "gemini-2.0-flash-001",
"Params": "",
"Sentiment": 4.519230769230769,
"Language understanding": 4.32,
"Phraseology": 4.34,
"Tricky questions": 3.9887640449438204
},
{
"Model": "gemini-2.0-flash-lite-001",
"Params": "",
"Sentiment": 4.230769230769231,
"Language understanding": 4.055,
"Phraseology": 4.235,
"Tricky questions": 3.853932584269663
},
{
"Model": "deepseek-ai/DeepSeek-V3 (API)",
"Params": "685B",
"Sentiment": 4.358974358974359,
"Language understanding": 4.22,
"Phraseology": 3.525,
"Tricky questions": 3.9887640449438204
},
{
"Model": "deepseek-ai/DeepSeek-R1 (API)",
"Params": "685B",
"Sentiment": 4.487179487179487,
"Language understanding": 4.345,
"Phraseology": 3.6,
"Tricky questions": 4.117977528089888
},
{
"Model": "deepseek-ai/DeepSeek-V3-0324 (API)",
"Params": "685B",
"Sentiment": 4.358974358974359,
"Language understanding": 4.195,
"Phraseology": 3.54,
"Tricky questions": 4.022471910112359
},
{
"Model": "google/gemma-3-27b-it (API)",
"Params": "27.4B",
"Sentiment": 3.878205128205128,
"Language understanding": 3.785,
"Phraseology": 4.025,
"Tricky questions": 3.533707865168539
},
{
"Model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 (API)",
"Params": "402B",
"Sentiment": 4.391025641025641,
"Language understanding": 4.11,
"Phraseology": 3.475,
"Tricky questions": 3.758426966292135
},
{
"Model": "meta-llama/Llama-4-Scout-17B-16E-Instruct (API)",
"Params": "109B",
"Sentiment": 4.102564102564102,
"Language understanding": 3.805,
"Phraseology": 3.9,
"Tricky questions": 3.191011235955056
},
{
"Model": "speakleash/Bielik-11B-v2.5-Instruct",
"Params": "11.2B",
"Sentiment": 4.006410256410256,
"Language understanding": 3.86,
"Phraseology": 3.13,
"Tricky questions": 2.9101123595505616
},
{
"Model": "speakleash/Bielik-4.5B-v3.0-Instruct",
"Params": "4.8B",
"Sentiment": 3.7628205128205128,
"Language understanding": 3.61,
"Phraseology": 3.675,
"Tricky questions": 2.455056179775281
},
{
"Model": "speakleash/Bielik-1.5B-v3.0-Instruct",
"Params": "1.6B",
"Sentiment": 3.5256410256410255,
"Language understanding": 2.33,
"Phraseology": 2.38,
"Tricky questions": 1.2191011235955056
}
]