Spaces:
Running
Running
[ | |
{ | |
"Model": "mistralai/Mistral-Large-Instruct-2407", | |
"Params": "123B", | |
"Sentiment": 4.230769230769231, | |
"Language understanding": 4.0, | |
"Phraseology": 3.86, | |
"Tricky questions": 3.646067415730337 | |
}, | |
{ | |
"Model": "mistralai/Mistral-Large-Instruct-2411", | |
"Params": "123B", | |
"Sentiment": 4.326923076923077, | |
"Language understanding": 3.975, | |
"Phraseology": 3.99, | |
"Tricky questions": 3.7247191011235956 | |
}, | |
{ | |
"Model": "alpindale/WizardLM-2-8x22B (API)", | |
"Params": "141B", | |
"Sentiment": 3.7051282051282053, | |
"Language understanding": 3.815, | |
"Phraseology": 4.22, | |
"Tricky questions": 3.056179775280899 | |
}, | |
{ | |
"Model": "meta-llama/Meta-Llama-3.1-70B-Instruct", | |
"Params": "70.6B", | |
"Sentiment": 4.326923076923077, | |
"Language understanding": 3.91, | |
"Phraseology": 3.25, | |
"Tricky questions": 3.0112359550561796 | |
}, | |
{ | |
"Model": "meta-llama/Meta-Llama-3-70B-Instruct", | |
"Params": "70.6B", | |
"Sentiment": 4.134615384615385, | |
"Language understanding": 3.82, | |
"Phraseology": 3.465, | |
"Tricky questions": 3.707865168539326 | |
}, | |
{ | |
"Model": "speakleash/Bielik-11B-v2.3-Instruct", | |
"Params": "11.2B", | |
"Sentiment": 3.9743589743589745, | |
"Language understanding": 3.785, | |
"Phraseology": 3.55, | |
"Tricky questions": 3.2191011235955056 | |
}, | |
{ | |
"Model": "mistralai/Mixtral-8x22B-Instruct-v0.1 (API)", | |
"Params": "141B", | |
"Sentiment": 3.782051282051282, | |
"Language understanding": 3.675, | |
"Phraseology": 3.55, | |
"Tricky questions": 3.235955056179775 | |
}, | |
{ | |
"Model": "speakleash/Bielik-11B-v2.1-Instruct", | |
"Params": "11.2B", | |
"Sentiment": 3.9551282051282053, | |
"Language understanding": 3.915, | |
"Phraseology": 3.105, | |
"Tricky questions": 3.4719101123595504 | |
}, | |
{ | |
"Model": "Qwen/Qwen2-72B-Instruct", | |
"Params": "72.7B", | |
"Sentiment": 3.7628205128205128, | |
"Language understanding": 3.89, | |
"Phraseology": 3.28, | |
"Tricky questions": 3.6797752808988764 | |
}, | |
{ | |
"Model": "speakleash/Bielik-11B-v2.0-Instruct", | |
"Params": "11.2B", | |
"Sentiment": 3.9743589743589745, | |
"Language understanding": 3.745, | |
"Phraseology": 3.125, | |
"Tricky questions": 2.196629213483146 | |
}, | |
{ | |
"Model": "speakleash/Bielik-11B-v2.2-Instruct", | |
"Params": "11.2B", | |
"Sentiment": 3.717948717948718, | |
"Language understanding": 3.73, | |
"Phraseology": 3.25, | |
"Tricky questions": 3.1235955056179776 | |
}, | |
{ | |
"Model": "Qwen/Qwen1.5-72B-Chat", | |
"Params": "72.3B", | |
"Sentiment": 3.4743589743589745, | |
"Language understanding": 3.515, | |
"Phraseology": 2.975, | |
"Tricky questions": 2.668539325842697 | |
}, | |
{ | |
"Model": "meta-llama/Meta-Llama-3.1-8B-Instruct", | |
"Params": "8.03B", | |
"Sentiment": 3.9743589743589745, | |
"Language understanding": 3.38, | |
"Phraseology": 2.58, | |
"Tricky questions": 2.1123595505617976 | |
}, | |
{ | |
"Model": "THUDM/glm-4-9b-chat", | |
"Params": "9.4B", | |
"Sentiment": 3.58974358974359, | |
"Language understanding": 3.455, | |
"Phraseology": 2.78, | |
"Tricky questions": 1.9831460674157304 | |
}, | |
{ | |
"Model": "mistralai/Mistral-Nemo-Instruct-2407", | |
"Params": "12.2B", | |
"Sentiment": 3.641025641025641, | |
"Language understanding": 3.29, | |
"Phraseology": 2.74, | |
"Tricky questions": 2.0898876404494384 | |
}, | |
{ | |
"Model": "meta-llama/Meta-Llama-3-8B-Instruct", | |
"Params": "8.03B", | |
"Sentiment": 3.3333333333333335, | |
"Language understanding": 3.15, | |
"Phraseology": 3.035, | |
"Tricky questions": 2.4775280898876404 | |
}, | |
{ | |
"Model": "upstage/SOLAR-10.7B-Instruct-v1.0", | |
"Params": "10.7B", | |
"Sentiment": 2.967948717948718, | |
"Language understanding": 3.18, | |
"Phraseology": 3.255, | |
"Tricky questions": 2.1235955056179776 | |
}, | |
{ | |
"Model": "speakleash/Bielik-7B-Instruct-v0.1", | |
"Params": "7.24B", | |
"Sentiment": 3.58974358974359, | |
"Language understanding": 3.475, | |
"Phraseology": 2.315, | |
"Tricky questions": 2.157303370786517 | |
}, | |
{ | |
"Model": "openchat/openchat-3.5-0106-gemma", | |
"Params": "8.54B", | |
"Sentiment": 3.730769230769231, | |
"Language understanding": 3.08, | |
"Phraseology": 2.445, | |
"Tricky questions": 1.6797752808988764 | |
}, | |
{ | |
"Model": "mistralai/Mixtral-8x7B-Instruct-v0.1", | |
"Params": "46.7B", | |
"Sentiment": 3.0576923076923075, | |
"Language understanding": 3.175, | |
"Phraseology": 2.885, | |
"Tricky questions": 1.797752808988764 | |
}, | |
{ | |
"Model": "mistralai/Mistral-7B-Instruct-v0.3", | |
"Params": "7.25B", | |
"Sentiment": 3.326923076923077, | |
"Language understanding": 3.06, | |
"Phraseology": 2.68, | |
"Tricky questions": 1.9887640449438202 | |
}, | |
{ | |
"Model": "berkeley-nest/Starling-LM-7B-alpha", | |
"Params": "7.24B", | |
"Sentiment": 3.0576923076923075, | |
"Language understanding": 2.925, | |
"Phraseology": 2.855, | |
"Tricky questions": 1.6797752808988764 | |
}, | |
{ | |
"Model": "openchat/openchat-3.5-0106", | |
"Params": "7.24B", | |
"Sentiment": 3.16025641025641, | |
"Language understanding": 2.835, | |
"Phraseology": 2.555, | |
"Tricky questions": 1.9606741573033708 | |
}, | |
{ | |
"Model": "internlm/internlm2-chat-20b", | |
"Params": "19.9B", | |
"Sentiment": 3.301282051282051, | |
"Language understanding": 2.785, | |
"Phraseology": 2.385, | |
"Tricky questions": 0.12359550561797752 | |
}, | |
{ | |
"Model": "01-ai/Yi-1.5-34B-Chat", | |
"Params": "34.4B", | |
"Sentiment": 3.076923076923077, | |
"Language understanding": 2.87, | |
"Phraseology": 2.38, | |
"Tricky questions": 1.0 | |
}, | |
{ | |
"Model": "Voicelab/trurl-2-13b-academic", | |
"Params": "13B", | |
"Sentiment": 3.301282051282051, | |
"Language understanding": 2.755, | |
"Phraseology": 2.165, | |
"Tricky questions": 1.0168539325842696 | |
}, | |
{ | |
"Model": "google/gemma-2-2b-it", | |
"Params": "2.61B", | |
"Sentiment": 3.3974358974359, | |
"Language understanding": 2.9, | |
"Phraseology": 2.095, | |
"Tricky questions": 2.2134831460674156 | |
}, | |
{ | |
"Model": "Qwen/Qwen2.5-3B-Instruct", | |
"Params": "3.09B", | |
"Sentiment": 2.948717948717949, | |
"Language understanding": 2.455, | |
"Phraseology": 2.8, | |
"Tricky questions": 1.8089887640449438 | |
}, | |
{ | |
"Model": "NousResearch/Hermes-3-Llama-3.2-3B", | |
"Params": "3.21B", | |
"Sentiment": 2.6153846153846154, | |
"Language understanding": 2.705, | |
"Phraseology": 2.765, | |
"Tricky questions": 1.1404494382022472 | |
}, | |
{ | |
"Model": "ibm-granite/granite-3.1-2b-instruct", | |
"Params": "2.53B", | |
"Sentiment": 3.076923076923077, | |
"Language understanding": 2.235, | |
"Phraseology": 1.88, | |
"Tricky questions": 0.5898876404494382 | |
}, | |
{ | |
"Model": "meta-llama/Llama-3.2-1B-Instruct", | |
"Params": "1.24B", | |
"Sentiment": 3.076923076923077, | |
"Language understanding": 1.735, | |
"Phraseology": 2.34, | |
"Tricky questions": 0.5224719101123596 | |
}, | |
{ | |
"Model": "microsoft/Phi-3.5-mini-instruct", | |
"Params": "3.82B", | |
"Sentiment": 2.435897435897436, | |
"Language understanding": 2.135, | |
"Phraseology": 2.425, | |
"Tricky questions": 1.0449438202247192 | |
}, | |
{ | |
"Model": "meta-llama/Llama-3.2-3B-Instruct", | |
"Params": "3.21B", | |
"Sentiment": 2.7564102564102564, | |
"Language understanding": 2.295, | |
"Phraseology": 1.72, | |
"Tricky questions": 1.2191011235955056 | |
}, | |
{ | |
"Model": "h2oai/h2o-danube2-1.8b-chat", | |
"Params": "1.83B", | |
"Sentiment": 2.371794871794872, | |
"Language understanding": 1.595, | |
"Phraseology": 2.47, | |
"Tricky questions": 0.12921348314606743 | |
}, | |
{ | |
"Model": "Qwen/Qwen2.5-1.5B-Instruct", | |
"Params": "1.54B", | |
"Sentiment": 2.7948717948717947, | |
"Language understanding": 1.35, | |
"Phraseology": 2.225, | |
"Tricky questions": 0.6629213483146067 | |
}, | |
{ | |
"Model": "utter-project/EuroLLM-1.7B-Instruct", | |
"Params": "1.66B", | |
"Sentiment": 2.243589743589744, | |
"Language understanding": 1.79, | |
"Phraseology": 2.26, | |
"Tricky questions": 0.7584269662921348 | |
}, | |
{ | |
"Model": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct", | |
"Params": "2.41B", | |
"Sentiment": 1.9423076923076923, | |
"Language understanding": 2.1155778894472363, | |
"Phraseology": 2.130653266331658, | |
"Tricky questions": 0.4887640449438202 | |
}, | |
{ | |
"Model": "HuggingFaceTB/SmolLM2-1.7B-Instruct", | |
"Params": "1.71B", | |
"Sentiment": 2.275641025641025, | |
"Language understanding": 1.1, | |
"Phraseology": 2.355, | |
"Tricky questions": 0.25280898876404495 | |
}, | |
{ | |
"Model": "Qwen/Qwen2.5-0.5B-Instruct", | |
"Params": "0.49B", | |
"Sentiment": 1.955128205128205, | |
"Language understanding": 0.835, | |
"Phraseology": 2.595, | |
"Tricky questions": 0.21910112359550563 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/Llama-PLLuM-70B-chat", | |
"Params": "70.6B", | |
"Sentiment": 3.94, | |
"Language understanding": 3.61, | |
"Phraseology": 3.35, | |
"Tricky questions": 3.2134831460674156 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/PLLuM-8x7B-nc-instruct", | |
"Params": "46.7B", | |
"Sentiment": 3.88, | |
"Language understanding": 3.59, | |
"Phraseology": 3.22, | |
"Tricky questions": 1.7640449438202248 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/Llama-PLLuM-70B-instruct", | |
"Params": "70.6B", | |
"Sentiment": 3.78, | |
"Language understanding": 3.63, | |
"Phraseology": 3.26, | |
"Tricky questions": 2.634831460674157 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/PLLuM-8x7B-instruct", | |
"Params": "46.7B", | |
"Sentiment": 3.59, | |
"Language understanding": 3.47, | |
"Phraseology": 3.46, | |
"Tricky questions": 1.5056179775280898 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/PLLuM-12B-instruct", | |
"Params": "12.2B", | |
"Sentiment": 3.71, | |
"Language understanding": 3.17, | |
"Phraseology": 3.59, | |
"Tricky questions": 1.904494382022472 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/PLLuM-8x7B-nc-chat", | |
"Params": "46.7B", | |
"Sentiment": 3.76, | |
"Language understanding": 3.48, | |
"Phraseology": 3.08, | |
"Tricky questions": 1.797752808988764 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/PLLuM-8x7B-chat", | |
"Params": "46.7B", | |
"Sentiment": 3.44, | |
"Language understanding": 3.45, | |
"Phraseology": 3.35, | |
"Tricky questions": 1.7808988764044944 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/PLLuM-12B-chat", | |
"Params": "12.2B", | |
"Sentiment": 3.32, | |
"Language understanding": 3.21, | |
"Phraseology": 3.43, | |
"Tricky questions": 2.5898876404494384 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/PLLuM-12B-nc-instruct", | |
"Params": "12.2B", | |
"Sentiment": 3.24, | |
"Language understanding": 3.31, | |
"Phraseology": 3.32, | |
"Tricky questions": 1.9831460674157304 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/Llama-PLLuM-8B-instruct", | |
"Params": "8.03B", | |
"Sentiment": 3.24, | |
"Language understanding": 2.90, | |
"Phraseology": 3.46, | |
"Tricky questions": 1.6629213483146068 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/Llama-PLLuM-8B-chat", | |
"Params": "8.03B", | |
"Sentiment": 3.13, | |
"Language understanding": 2.93, | |
"Phraseology": 3.36, | |
"Tricky questions": 2.252808988764045 | |
}, | |
{ | |
"Model": "CYFRAGOVPL/PLLuM-12B-nc-chat", | |
"Params": "12.2B", | |
"Sentiment": 3.22, | |
"Language understanding": 3.23, | |
"Phraseology": 3.54, | |
"Tricky questions": 2.6235955056179776 | |
}, | |
{ | |
"Model": "Qwen/Qwen2.5-72B-Instruct", | |
"Params": "72.7B", | |
"Sentiment": 4.076923076923077, | |
"Language understanding": 3.97, | |
"Phraseology": 3.93, | |
"Tricky questions": 3.808988764044944 | |
}, | |
{ | |
"Model": "Qwen/Qwen2.5-32B-Instruct", | |
"Params": "32.8B", | |
"Sentiment": 3.8141025641025643, | |
"Language understanding": 3.565, | |
"Phraseology": 4.035, | |
"Tricky questions": 3.5898876404494384 | |
}, | |
{ | |
"Model": "mistralai/Mistral-Small-24B-Instruct-2501", | |
"Params": "23.6B", | |
"Sentiment": 3.91025641025641, | |
"Language understanding": 3.6, | |
"Phraseology": 3.875, | |
"Tricky questions": 3.449438202247191 | |
}, | |
{ | |
"Model": "meta-llama/Llama-3.3-70B-Instruct", | |
"Params": "70.6B", | |
"Sentiment": 4.294871794871795, | |
"Language understanding": 3.865, | |
"Phraseology": 3.04, | |
"Tricky questions": 3.3764044943820224 | |
}, | |
{ | |
"Model": "Qwen/Qwen2.5-14B-Instruct", | |
"Params": "14.8B", | |
"Sentiment": 3.91025641025641, | |
"Language understanding": 3.565, | |
"Phraseology": 3.37, | |
"Tricky questions": 3.337078651685393 | |
}, | |
{ | |
"Model": "microsoft/phi-4", | |
"Params": "14.7B", | |
"Sentiment": 3.717948717948718, | |
"Language understanding": 3.54, | |
"Phraseology": 3.235, | |
"Tricky questions": 2.7247191011235956 | |
}, | |
{ | |
"Model": "Qwen/Qwen2.5-7B-Instruct", | |
"Params": "7.62B", | |
"Sentiment": 3.5576923076923075, | |
"Language understanding": 3.025, | |
"Phraseology": 3.095, | |
"Tricky questions": 2.5842696629213484 | |
}, | |
{ | |
"Model": "microsoft/Phi-4-mini-instruct", | |
"Params": "3.84B", | |
"Sentiment": 2.6923076923076925, | |
"Language understanding": 2.43, | |
"Phraseology": 2.245, | |
"Tricky questions": 1.303370786516854 | |
}, | |
{ | |
"Model": "gemini-2.0-flash-001", | |
"Params": "", | |
"Sentiment": 4.519230769230769, | |
"Language understanding": 4.32, | |
"Phraseology": 4.34, | |
"Tricky questions": 3.9887640449438204 | |
}, | |
{ | |
"Model": "gemini-2.0-flash-lite-001", | |
"Params": "", | |
"Sentiment": 4.230769230769231, | |
"Language understanding": 4.055, | |
"Phraseology": 4.235, | |
"Tricky questions": 3.853932584269663 | |
}, | |
{ | |
"Model": "deepseek-ai/DeepSeek-V3 (API)", | |
"Params": "685B", | |
"Sentiment": 4.358974358974359, | |
"Language understanding": 4.22, | |
"Phraseology": 3.525, | |
"Tricky questions": 3.9887640449438204 | |
}, | |
{ | |
"Model": "deepseek-ai/DeepSeek-R1 (API)", | |
"Params": "685B", | |
"Sentiment": 4.487179487179487, | |
"Language understanding": 4.345, | |
"Phraseology": 3.6, | |
"Tricky questions": 4.117977528089888 | |
}, | |
{ | |
"Model": "deepseek-ai/DeepSeek-V3-0324 (API)", | |
"Params": "685B", | |
"Sentiment": 4.358974358974359, | |
"Language understanding": 4.195, | |
"Phraseology": 3.54, | |
"Tricky questions": 4.022471910112359 | |
}, | |
{ | |
"Model": "google/gemma-3-27b-it (API)", | |
"Params": "27.4B", | |
"Sentiment": 3.878205128205128, | |
"Language understanding": 3.785, | |
"Phraseology": 4.025, | |
"Tricky questions": 3.533707865168539 | |
}, | |
{ | |
"Model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 (API)", | |
"Params": "402B", | |
"Sentiment": 4.391025641025641, | |
"Language understanding": 4.11, | |
"Phraseology": 3.475, | |
"Tricky questions": 3.758426966292135 | |
}, | |
{ | |
"Model": "meta-llama/Llama-4-Scout-17B-16E-Instruct (API)", | |
"Params": "109B", | |
"Sentiment": 4.102564102564102, | |
"Language understanding": 3.805, | |
"Phraseology": 3.9, | |
"Tricky questions": 3.191011235955056 | |
}, | |
{ | |
"Model": "speakleash/Bielik-11B-v2.5-Instruct", | |
"Params": "11.2B", | |
"Sentiment": 4.006410256410256, | |
"Language understanding": 3.86, | |
"Phraseology": 3.13, | |
"Tricky questions": 2.9101123595505616 | |
}, | |
{ | |
"Model": "speakleash/Bielik-4.5B-v3.0-Instruct", | |
"Params": "4.8B", | |
"Sentiment": 3.7628205128205128, | |
"Language understanding": 3.61, | |
"Phraseology": 3.675, | |
"Tricky questions": 2.455056179775281 | |
}, | |
{ | |
"Model": "speakleash/Bielik-1.5B-v3.0-Instruct", | |
"Params": "1.6B", | |
"Sentiment": 3.5256410256410255, | |
"Language understanding": 2.33, | |
"Phraseology": 2.38, | |
"Tricky questions": 1.2191011235955056 | |
} | |
] |