|
{ |
|
"RelicEnv": { |
|
"qwen2.5-3b-instruct": 0.18, |
|
"qwen2.5-7b-instruct": 0.396, |
|
"qwen2.5-14b-instruct": 0.8, |
|
"qwen2.5-32b-instruct": 0.8560000000000001, |
|
"qwen2.5-72b-instruct": 0.892, |
|
"llama-3.1-8b-instruct": 0.21600000000000003, |
|
"llama-3.1-70b-instruct": 0.6639999999999999, |
|
"llama-3.2-3b-instruct": 0.164, |
|
"llama-3.3-70b-instruct": 0.836, |
|
"mistral-large-instruct-2411": 0.8560000000000001, |
|
"gemma-2-27b-it": 0.544, |
|
"gemma-2-9b-it": 0.36400000000000005, |
|
"deepseek-v3": 0.9359999999999999, |
|
"deepseek-r1": 0.916, |
|
"qwq-32b": 0.9560000000000001, |
|
"Average": 0.6384 |
|
}, |
|
"HerbEnv": { |
|
"qwen2.5-3b-instruct": 0.184, |
|
"qwen2.5-7b-instruct": 0.304, |
|
"qwen2.5-14b-instruct": 0.784, |
|
"qwen2.5-32b-instruct": 0.8400000000000001, |
|
"qwen2.5-72b-instruct": 0.8039999999999999, |
|
"llama-3.1-8b-instruct": 0.30000000000000004, |
|
"llama-3.1-70b-instruct": 0.568, |
|
"llama-3.2-3b-instruct": 0.128, |
|
"llama-3.3-70b-instruct": 0.612, |
|
"mistral-large-instruct-2411": 0.76, |
|
"gemma-2-27b-it": 0.504, |
|
"gemma-2-9b-it": 0.18000000000000002, |
|
"deepseek-v3": 0.968, |
|
"deepseek-r1": 0.9359999999999999, |
|
"qwq-32b": 0.924, |
|
"Average": 0.5863999999999999 |
|
}, |
|
"TransdimensionalEnv": { |
|
"qwen2.5-3b-instruct": 0.156, |
|
"qwen2.5-7b-instruct": 0.38400000000000006, |
|
"qwen2.5-14b-instruct": 0.836, |
|
"qwen2.5-32b-instruct": 0.876, |
|
"qwen2.5-72b-instruct": 0.836, |
|
"llama-3.1-8b-instruct": 0.44399999999999995, |
|
"llama-3.1-70b-instruct": 0.828, |
|
"llama-3.2-3b-instruct": 0.172, |
|
"llama-3.3-70b-instruct": 0.86, |
|
"mistral-large-instruct-2411": 0.86, |
|
"gemma-2-27b-it": 0.5599999999999999, |
|
"gemma-2-9b-it": 0.516, |
|
"deepseek-v3": 0.968, |
|
"deepseek-r1": 0.9359999999999999, |
|
"qwq-32b": 0.968, |
|
"Average": 0.6799999999999999 |
|
}, |
|
"SorcererEnv": { |
|
"qwen2.5-3b-instruct": 0.16, |
|
"qwen2.5-7b-instruct": 0.32400000000000007, |
|
"qwen2.5-14b-instruct": 0.8039999999999999, |
|
"qwen2.5-32b-instruct": 0.8240000000000001, |
|
"qwen2.5-72b-instruct": 0.8320000000000001, |
|
"llama-3.1-8b-instruct": 0.276, |
|
"llama-3.1-70b-instruct": 0.6639999999999999, |
|
"llama-3.2-3b-instruct": 0.196, |
|
"llama-3.3-70b-instruct": 0.7360000000000001, |
|
"mistral-large-instruct-2411": 0.8, |
|
"gemma-2-27b-it": 0.5640000000000001, |
|
"gemma-2-9b-it": 0.28800000000000003, |
|
"deepseek-v3": 0.8640000000000001, |
|
"deepseek-r1": 0.8240000000000001, |
|
"qwq-32b": 0.8400000000000001, |
|
"Average": 0.5997333333333333 |
|
}, |
|
"QuantumEnv": { |
|
"qwen2.5-3b-instruct": 0.196, |
|
"qwen2.5-7b-instruct": 0.532, |
|
"qwen2.5-14b-instruct": 0.8720000000000001, |
|
"qwen2.5-32b-instruct": 0.9039999999999999, |
|
"qwen2.5-72b-instruct": 0.916, |
|
"llama-3.1-8b-instruct": 0.45600000000000007, |
|
"llama-3.1-70b-instruct": 0.7999999999999999, |
|
"llama-3.2-3b-instruct": 0.168, |
|
"llama-3.3-70b-instruct": 0.8480000000000001, |
|
"mistral-large-instruct-2411": 0.8720000000000001, |
|
"gemma-2-27b-it": 0.744, |
|
"gemma-2-9b-it": 0.544, |
|
"deepseek-v3": 0.884, |
|
"deepseek-r1": 0.8640000000000001, |
|
"qwq-32b": 0.868, |
|
"Average": 0.6978666666666666 |
|
}, |
|
"AstronomyEnv": { |
|
"qwen2.5-3b-instruct": 0.172, |
|
"qwen2.5-7b-instruct": 0.42800000000000005, |
|
"qwen2.5-14b-instruct": 0.716, |
|
"qwen2.5-32b-instruct": 0.676, |
|
"qwen2.5-72b-instruct": 0.748, |
|
"llama-3.1-8b-instruct": 0.336, |
|
"llama-3.1-70b-instruct": 0.692, |
|
"llama-3.2-3b-instruct": 0.176, |
|
"llama-3.3-70b-instruct": 0.6519999999999999, |
|
"mistral-large-instruct-2411": 0.7999999999999999, |
|
"gemma-2-27b-it": 0.508, |
|
"gemma-2-9b-it": 0.372, |
|
"deepseek-v3": 0.748, |
|
"deepseek-r1": 0.8200000000000001, |
|
"qwq-32b": 0.852, |
|
"Average": 0.5797333333333333 |
|
}, |
|
"MusicGenresEnv": { |
|
"qwen2.5-3b-instruct": 0.22000000000000003, |
|
"qwen2.5-7b-instruct": 0.42000000000000004, |
|
"qwen2.5-14b-instruct": 0.72, |
|
"qwen2.5-32b-instruct": 0.716, |
|
"qwen2.5-72b-instruct": 0.696, |
|
"llama-3.1-8b-instruct": 0.35200000000000004, |
|
"llama-3.1-70b-instruct": 0.6280000000000001, |
|
"llama-3.2-3b-instruct": 0.136, |
|
"llama-3.3-70b-instruct": 0.592, |
|
"mistral-large-instruct-2411": 0.732, |
|
"gemma-2-27b-it": 0.44800000000000006, |
|
"gemma-2-9b-it": 0.332, |
|
"deepseek-v3": 0.748, |
|
"deepseek-r1": 0.792, |
|
"qwq-32b": 0.876, |
|
"Average": 0.5605333333333334 |
|
}, |
|
"CloudEnv": { |
|
"qwen2.5-3b-instruct": 0.21199999999999997, |
|
"qwen2.5-7b-instruct": 0.42000000000000004, |
|
"qwen2.5-14b-instruct": 0.76, |
|
"qwen2.5-32b-instruct": 0.656, |
|
"qwen2.5-72b-instruct": 0.712, |
|
"llama-3.1-8b-instruct": 0.42000000000000004, |
|
"llama-3.1-70b-instruct": 0.664, |
|
"llama-3.2-3b-instruct": 0.22800000000000004, |
|
"llama-3.3-70b-instruct": 0.696, |
|
"mistral-large-instruct-2411": 0.8360000000000001, |
|
"gemma-2-27b-it": 0.6, |
|
"gemma-2-9b-it": 0.4, |
|
"deepseek-v3": 0.8200000000000001, |
|
"deepseek-r1": 0.908, |
|
"qwq-32b": 0.9120000000000001, |
|
"Average": 0.6162666666666667 |
|
}, |
|
"CuisineEnv": { |
|
"qwen2.5-3b-instruct": 0.21600000000000003, |
|
"qwen2.5-7b-instruct": 0.316, |
|
"qwen2.5-14b-instruct": 0.6960000000000001, |
|
"qwen2.5-32b-instruct": 0.664, |
|
"qwen2.5-72b-instruct": 0.656, |
|
"llama-3.1-8b-instruct": 0.22799999999999998, |
|
"llama-3.1-70b-instruct": 0.476, |
|
"llama-3.2-3b-instruct": 0.152, |
|
"llama-3.3-70b-instruct": 0.44400000000000006, |
|
"mistral-large-instruct-2411": 0.644, |
|
"gemma-2-27b-it": 0.27599999999999997, |
|
"gemma-2-9b-it": 0.156, |
|
"deepseek-v3": 0.8400000000000001, |
|
"deepseek-r1": 0.7959999999999999, |
|
"qwq-32b": 0.8800000000000001, |
|
"Average": 0.49599999999999994 |
|
}, |
|
"PlantEnv": { |
|
"qwen2.5-3b-instruct": 0.168, |
|
"qwen2.5-7b-instruct": 0.236, |
|
"qwen2.5-14b-instruct": 0.34, |
|
"qwen2.5-32b-instruct": 0.22000000000000003, |
|
"qwen2.5-72b-instruct": 0.22799999999999998, |
|
"llama-3.1-8b-instruct": 0.148, |
|
"llama-3.1-70b-instruct": 0.16, |
|
"llama-3.2-3b-instruct": 0.084, |
|
"llama-3.3-70b-instruct": 0.07599999999999998, |
|
"mistral-large-instruct-2411": 0.264, |
|
"gemma-2-27b-it": 0.14400000000000002, |
|
"gemma-2-9b-it": 0.092, |
|
"deepseek-v3": 0.512, |
|
"deepseek-r1": 0.5, |
|
"qwq-32b": 0.548, |
|
"Average": 0.24800000000000003 |
|
}, |
|
"HistoricalEnv": { |
|
"qwen2.5-3b-instruct": 0.24, |
|
"qwen2.5-7b-instruct": 0.368, |
|
"qwen2.5-14b-instruct": 0.5800000000000001, |
|
"qwen2.5-32b-instruct": 0.476, |
|
"qwen2.5-72b-instruct": 0.512, |
|
"llama-3.1-8b-instruct": 0.332, |
|
"llama-3.1-70b-instruct": 0.616, |
|
"llama-3.2-3b-instruct": 0.2, |
|
"llama-3.3-70b-instruct": 0.652, |
|
"mistral-large-instruct-2411": 0.6880000000000001, |
|
"gemma-2-27b-it": 0.5, |
|
"gemma-2-9b-it": 0.376, |
|
"deepseek-v3": 0.748, |
|
"deepseek-r1": 0.828, |
|
"qwq-32b": 0.884, |
|
"Average": 0.5333333333333334 |
|
}, |
|
"GadgetEnv": { |
|
"qwen2.5-3b-instruct": 0.124, |
|
"qwen2.5-7b-instruct": 0.312, |
|
"qwen2.5-14b-instruct": 0.852, |
|
"qwen2.5-32b-instruct": 0.8640000000000001, |
|
"qwen2.5-72b-instruct": 0.892, |
|
"llama-3.1-8b-instruct": 0.284, |
|
"llama-3.1-70b-instruct": 0.692, |
|
"llama-3.2-3b-instruct": 0.11200000000000002, |
|
"llama-3.3-70b-instruct": 0.7360000000000001, |
|
"mistral-large-instruct-2411": 0.884, |
|
"gemma-2-27b-it": 0.32799999999999996, |
|
"gemma-2-9b-it": 0.184, |
|
"deepseek-v3": 0.9640000000000001, |
|
"deepseek-r1": 0.932, |
|
"qwq-32b": 0.932, |
|
"Average": 0.6061333333333334 |
|
}, |
|
"TimeTravelEnv": { |
|
"qwen2.5-3b-instruct": 0.128, |
|
"qwen2.5-7b-instruct": 0.292, |
|
"qwen2.5-14b-instruct": 0.808, |
|
"qwen2.5-32b-instruct": 0.828, |
|
"qwen2.5-72b-instruct": 0.8039999999999999, |
|
"llama-3.1-8b-instruct": 0.376, |
|
"llama-3.1-70b-instruct": 0.684, |
|
"llama-3.2-3b-instruct": 0.124, |
|
"llama-3.3-70b-instruct": 0.716, |
|
"mistral-large-instruct-2411": 0.884, |
|
"gemma-2-27b-it": 0.32799999999999996, |
|
"gemma-2-9b-it": 0.21600000000000003, |
|
"deepseek-v3": 0.9399999999999998, |
|
"deepseek-r1": 0.932, |
|
"qwq-32b": 0.924, |
|
"Average": 0.5989333333333333 |
|
}, |
|
"PollutionEnv": { |
|
"qwen2.5-3b-instruct": 0.136, |
|
"qwen2.5-7b-instruct": 0.328, |
|
"qwen2.5-14b-instruct": 0.792, |
|
"qwen2.5-32b-instruct": 0.7120000000000001, |
|
"qwen2.5-72b-instruct": 0.704, |
|
"llama-3.1-8b-instruct": 0.316, |
|
"llama-3.1-70b-instruct": 0.664, |
|
"llama-3.2-3b-instruct": 0.124, |
|
"llama-3.3-70b-instruct": 0.6960000000000001, |
|
"mistral-large-instruct-2411": 0.784, |
|
"gemma-2-27b-it": 0.336, |
|
"gemma-2-9b-it": 0.252, |
|
"deepseek-v3": 0.8640000000000001, |
|
"deepseek-r1": 0.8560000000000001, |
|
"qwq-32b": 0.852, |
|
"Average": 0.5610666666666666 |
|
}, |
|
"DemographicEnv": { |
|
"qwen2.5-3b-instruct": 0.072, |
|
"qwen2.5-7b-instruct": 0.42800000000000005, |
|
"qwen2.5-14b-instruct": 0.68, |
|
"qwen2.5-32b-instruct": 0.7799999999999999, |
|
"qwen2.5-72b-instruct": 0.7719999999999999, |
|
"llama-3.1-8b-instruct": 0.272, |
|
"llama-3.1-70b-instruct": 0.6239999999999999, |
|
"llama-3.2-3b-instruct": 0.176, |
|
"llama-3.3-70b-instruct": 0.748, |
|
"mistral-large-instruct-2411": 0.8200000000000001, |
|
"gemma-2-27b-it": 0.356, |
|
"gemma-2-9b-it": 0.156, |
|
"deepseek-v3": 0.8960000000000001, |
|
"deepseek-r1": 0.876, |
|
"qwq-32b": 0.8960000000000001, |
|
"Average": 0.5701333333333333 |
|
}, |
|
"GeneticEnv": { |
|
"qwen2.5-3b-instruct": 0.084, |
|
"qwen2.5-7b-instruct": 0.392, |
|
"qwen2.5-14b-instruct": 0.884, |
|
"qwen2.5-32b-instruct": 0.9279999999999999, |
|
"qwen2.5-72b-instruct": 0.9400000000000001, |
|
"llama-3.1-8b-instruct": 0.45999999999999996, |
|
"llama-3.1-70b-instruct": 0.9, |
|
"llama-3.2-3b-instruct": 0.192, |
|
"llama-3.3-70b-instruct": 0.916, |
|
"mistral-large-instruct-2411": 0.9040000000000001, |
|
"gemma-2-27b-it": 0.776, |
|
"gemma-2-9b-it": 0.548, |
|
"deepseek-v3": 0.984, |
|
"deepseek-r1": 0.952, |
|
"qwq-32b": 0.932, |
|
"Average": 0.7194666666666667 |
|
}, |
|
"CraftsmanEnv": { |
|
"qwen2.5-3b-instruct": 0.14400000000000002, |
|
"qwen2.5-7b-instruct": 0.256, |
|
"qwen2.5-14b-instruct": 0.624, |
|
"qwen2.5-32b-instruct": 0.736, |
|
"qwen2.5-72b-instruct": 0.664, |
|
"llama-3.1-8b-instruct": 0.22000000000000003, |
|
"llama-3.1-70b-instruct": 0.524, |
|
"llama-3.2-3b-instruct": 0.10800000000000001, |
|
"llama-3.3-70b-instruct": 0.41600000000000004, |
|
"mistral-large-instruct-2411": 0.7080000000000001, |
|
"gemma-2-27b-it": 0.324, |
|
"gemma-2-9b-it": 0.096, |
|
"deepseek-v3": 0.9, |
|
"deepseek-r1": 0.7879999999999999, |
|
"qwq-32b": 0.8160000000000001, |
|
"Average": 0.4882666666666667 |
|
}, |
|
"StarConstellationEnv": { |
|
"qwen2.5-3b-instruct": 0.1, |
|
"qwen2.5-7b-instruct": 0.332, |
|
"qwen2.5-14b-instruct": 0.5960000000000001, |
|
"qwen2.5-32b-instruct": 0.572, |
|
"qwen2.5-72b-instruct": 0.5840000000000001, |
|
"llama-3.1-8b-instruct": 0.376, |
|
"llama-3.1-70b-instruct": 0.4640000000000001, |
|
"llama-3.2-3b-instruct": 0.136, |
|
"llama-3.3-70b-instruct": 0.41200000000000003, |
|
"mistral-large-instruct-2411": 0.6120000000000001, |
|
"gemma-2-27b-it": 0.472, |
|
"gemma-2-9b-it": 0.22799999999999998, |
|
"deepseek-v3": 0.744, |
|
"deepseek-r1": 0.748, |
|
"qwq-32b": 0.736, |
|
"Average": 0.47413333333333335 |
|
}, |
|
"MythicalCreatureEnv": { |
|
"qwen2.5-3b-instruct": 0.2, |
|
"qwen2.5-7b-instruct": 0.324, |
|
"qwen2.5-14b-instruct": 0.632, |
|
"qwen2.5-32b-instruct": 0.712, |
|
"qwen2.5-72b-instruct": 0.668, |
|
"llama-3.1-8b-instruct": 0.31200000000000006, |
|
"llama-3.1-70b-instruct": 0.62, |
|
"llama-3.2-3b-instruct": 0.11200000000000002, |
|
"llama-3.3-70b-instruct": 0.648, |
|
"mistral-large-instruct-2411": 0.7480000000000001, |
|
"gemma-2-27b-it": 0.42799999999999994, |
|
"gemma-2-9b-it": 0.268, |
|
"deepseek-v3": 0.8400000000000001, |
|
"deepseek-r1": 0.8400000000000001, |
|
"qwq-32b": 0.852, |
|
"Average": 0.5469333333333333 |
|
}, |
|
"ArtStyleEnv": { |
|
"qwen2.5-3b-instruct": 0.136, |
|
"qwen2.5-7b-instruct": 0.332, |
|
"qwen2.5-14b-instruct": 0.78, |
|
"qwen2.5-32b-instruct": 0.8320000000000001, |
|
"qwen2.5-72b-instruct": 0.748, |
|
"llama-3.1-8b-instruct": 0.356, |
|
"llama-3.1-70b-instruct": 0.616, |
|
"llama-3.2-3b-instruct": 0.17200000000000001, |
|
"llama-3.3-70b-instruct": 0.6199999999999999, |
|
"mistral-large-instruct-2411": 0.828, |
|
"gemma-2-27b-it": 0.43200000000000005, |
|
"gemma-2-9b-it": 0.256, |
|
"deepseek-v3": 0.876, |
|
"deepseek-r1": 0.8200000000000001, |
|
"qwq-32b": 0.868, |
|
"Average": 0.5781333333333335 |
|
}, |
|
"CookingEnv": { |
|
"qwen2.5-3b-instruct": 0.13999999999999999, |
|
"qwen2.5-7b-instruct": 0.44799999999999995, |
|
"qwen2.5-14b-instruct": 0.76, |
|
"qwen2.5-32b-instruct": 0.7440000000000001, |
|
"qwen2.5-72b-instruct": 0.7, |
|
"llama-3.1-8b-instruct": 0.364, |
|
"llama-3.1-70b-instruct": 0.6839999999999999, |
|
"llama-3.2-3b-instruct": 0.156, |
|
"llama-3.3-70b-instruct": 0.656, |
|
"mistral-large-instruct-2411": 0.74, |
|
"gemma-2-27b-it": 0.48, |
|
"gemma-2-9b-it": 0.364, |
|
"deepseek-v3": 0.8640000000000001, |
|
"deepseek-r1": 0.812, |
|
"qwq-32b": 0.9, |
|
"Average": 0.5874666666666666 |
|
}, |
|
"HistoricalBattleEnv": { |
|
"qwen2.5-3b-instruct": 0.256, |
|
"qwen2.5-7b-instruct": 0.292, |
|
"qwen2.5-14b-instruct": 0.45999999999999996, |
|
"qwen2.5-32b-instruct": 0.476, |
|
"qwen2.5-72b-instruct": 0.42400000000000004, |
|
"llama-3.1-8b-instruct": 0.28400000000000003, |
|
"llama-3.1-70b-instruct": 0.492, |
|
"llama-3.2-3b-instruct": 0.148, |
|
"llama-3.3-70b-instruct": 0.62, |
|
"mistral-large-instruct-2411": 0.608, |
|
"gemma-2-27b-it": 0.388, |
|
"gemma-2-9b-it": 0.34, |
|
"deepseek-v3": 0.724, |
|
"deepseek-r1": 0.788, |
|
"qwq-32b": 0.8560000000000001, |
|
"Average": 0.47706666666666664 |
|
}, |
|
"FungalEnv": { |
|
"qwen2.5-3b-instruct": 0.15999999999999998, |
|
"qwen2.5-7b-instruct": 0.46399999999999997, |
|
"qwen2.5-14b-instruct": 0.664, |
|
"qwen2.5-32b-instruct": 0.728, |
|
"qwen2.5-72b-instruct": 0.6839999999999999, |
|
"llama-3.1-8b-instruct": 0.41600000000000004, |
|
"llama-3.1-70b-instruct": 0.5840000000000001, |
|
"llama-3.2-3b-instruct": 0.14, |
|
"llama-3.3-70b-instruct": 0.644, |
|
"mistral-large-instruct-2411": 0.7440000000000001, |
|
"gemma-2-27b-it": 0.536, |
|
"gemma-2-9b-it": 0.184, |
|
"deepseek-v3": 0.844, |
|
"deepseek-r1": 0.764, |
|
"qwq-32b": 0.7879999999999999, |
|
"Average": 0.5562666666666666 |
|
}, |
|
"CryptographyEnv": { |
|
"qwen2.5-3b-instruct": 0.24000000000000005, |
|
"qwen2.5-7b-instruct": 0.23199999999999998, |
|
"qwen2.5-14b-instruct": 0.508, |
|
"qwen2.5-32b-instruct": 0.5760000000000001, |
|
"qwen2.5-72b-instruct": 0.528, |
|
"llama-3.1-8b-instruct": 0.29600000000000004, |
|
"llama-3.1-70b-instruct": 0.524, |
|
"llama-3.2-3b-instruct": 0.11600000000000002, |
|
"llama-3.3-70b-instruct": 0.512, |
|
"mistral-large-instruct-2411": 0.6799999999999999, |
|
"gemma-2-27b-it": 0.328, |
|
"gemma-2-9b-it": 0.192, |
|
"deepseek-v3": 0.784, |
|
"deepseek-r1": 0.74, |
|
"qwq-32b": 0.8480000000000001, |
|
"Average": 0.4736 |
|
}, |
|
"StorageEnv": { |
|
"qwen2.5-3b-instruct": 0.22800000000000004, |
|
"qwen2.5-7b-instruct": 0.44000000000000006, |
|
"qwen2.5-14b-instruct": 0.852, |
|
"qwen2.5-32b-instruct": 0.884, |
|
"qwen2.5-72b-instruct": 0.8119999999999999, |
|
"llama-3.1-8b-instruct": 0.34800000000000003, |
|
"llama-3.1-70b-instruct": 0.724, |
|
"llama-3.2-3b-instruct": 0.21600000000000003, |
|
"llama-3.3-70b-instruct": 0.796, |
|
"mistral-large-instruct-2411": 0.8880000000000001, |
|
"gemma-2-27b-it": 0.596, |
|
"gemma-2-9b-it": 0.392, |
|
"deepseek-v3": 0.9640000000000001, |
|
"deepseek-r1": 0.9119999999999999, |
|
"qwq-32b": 0.944, |
|
"Average": 0.6663999999999999 |
|
}, |
|
"RoverEnv": { |
|
"qwen2.5-3b-instruct": 0.14400000000000002, |
|
"qwen2.5-7b-instruct": 0.236, |
|
"qwen2.5-14b-instruct": 0.8480000000000001, |
|
"qwen2.5-32b-instruct": 0.8360000000000001, |
|
"qwen2.5-72b-instruct": 0.796, |
|
"llama-3.1-8b-instruct": 0.28400000000000003, |
|
"llama-3.1-70b-instruct": 0.612, |
|
"llama-3.2-3b-instruct": 0.148, |
|
"llama-3.3-70b-instruct": 0.724, |
|
"mistral-large-instruct-2411": 0.828, |
|
"gemma-2-27b-it": 0.4600000000000001, |
|
"gemma-2-9b-it": 0.072, |
|
"deepseek-v3": 0.9200000000000002, |
|
"deepseek-r1": 0.9, |
|
"qwq-32b": 0.8720000000000001, |
|
"Average": 0.5786666666666668 |
|
}, |
|
"FashionEnv": { |
|
"qwen2.5-3b-instruct": 0.17200000000000001, |
|
"qwen2.5-7b-instruct": 0.304, |
|
"qwen2.5-14b-instruct": 0.8240000000000001, |
|
"qwen2.5-32b-instruct": 0.808, |
|
"qwen2.5-72b-instruct": 0.768, |
|
"llama-3.1-8b-instruct": 0.32, |
|
"llama-3.1-70b-instruct": 0.6, |
|
"llama-3.2-3b-instruct": 0.16399999999999998, |
|
"llama-3.3-70b-instruct": 0.6160000000000001, |
|
"mistral-large-instruct-2411": 0.756, |
|
"gemma-2-27b-it": 0.524, |
|
"gemma-2-9b-it": 0.292, |
|
"deepseek-v3": 0.86, |
|
"deepseek-r1": 0.756, |
|
"qwq-32b": 0.86, |
|
"Average": 0.5749333333333334 |
|
}, |
|
"LicenseEnv": { |
|
"qwen2.5-3b-instruct": 0.196, |
|
"qwen2.5-7b-instruct": 0.29200000000000004, |
|
"qwen2.5-14b-instruct": 0.556, |
|
"qwen2.5-32b-instruct": 0.44000000000000006, |
|
"qwen2.5-72b-instruct": 0.484, |
|
"llama-3.1-8b-instruct": 0.26, |
|
"llama-3.1-70b-instruct": 0.496, |
|
"llama-3.2-3b-instruct": 0.072, |
|
"llama-3.3-70b-instruct": 0.45999999999999996, |
|
"mistral-large-instruct-2411": 0.504, |
|
"gemma-2-27b-it": 0.37600000000000006, |
|
"gemma-2-9b-it": 0.296, |
|
"deepseek-v3": 0.556, |
|
"deepseek-r1": 0.52, |
|
"qwq-32b": 0.5800000000000001, |
|
"Average": 0.4058666666666667 |
|
}, |
|
"VirusClassificationEnv": { |
|
"qwen2.5-3b-instruct": 0.22000000000000003, |
|
"qwen2.5-7b-instruct": 0.28, |
|
"qwen2.5-14b-instruct": 0.384, |
|
"qwen2.5-32b-instruct": 0.38, |
|
"qwen2.5-72b-instruct": 0.42800000000000005, |
|
"llama-3.1-8b-instruct": 0.256, |
|
"llama-3.1-70b-instruct": 0.332, |
|
"llama-3.2-3b-instruct": 0.156, |
|
"llama-3.3-70b-instruct": 0.396, |
|
"mistral-large-instruct-2411": 0.532, |
|
"gemma-2-27b-it": 0.34, |
|
"gemma-2-9b-it": 0.31200000000000006, |
|
"deepseek-v3": 0.536, |
|
"deepseek-r1": 0.64, |
|
"qwq-32b": 0.684, |
|
"Average": 0.3917333333333333 |
|
}, |
|
"TestingEnv": { |
|
"qwen2.5-3b-instruct": 0.19200000000000003, |
|
"qwen2.5-7b-instruct": 0.22000000000000003, |
|
"qwen2.5-14b-instruct": 0.608, |
|
"qwen2.5-32b-instruct": 0.648, |
|
"qwen2.5-72b-instruct": 0.708, |
|
"llama-3.1-8b-instruct": 0.332, |
|
"llama-3.1-70b-instruct": 0.68, |
|
"llama-3.2-3b-instruct": 0.17200000000000001, |
|
"llama-3.3-70b-instruct": 0.7040000000000001, |
|
"mistral-large-instruct-2411": 0.764, |
|
"gemma-2-27b-it": 0.22799999999999998, |
|
"gemma-2-9b-it": 0.26, |
|
"deepseek-v3": 0.8880000000000001, |
|
"deepseek-r1": 0.764, |
|
"qwq-32b": 0.7999999999999999, |
|
"Average": 0.5312 |
|
}, |
|
"NarrativeDetectEnv": { |
|
"qwen2.5-3b-instruct": 0.148, |
|
"qwen2.5-7b-instruct": 0.30000000000000004, |
|
"qwen2.5-14b-instruct": 0.552, |
|
"qwen2.5-32b-instruct": 0.8440000000000001, |
|
"qwen2.5-72b-instruct": 0.76, |
|
"llama-3.1-8b-instruct": 0.28800000000000003, |
|
"llama-3.1-70b-instruct": 0.6279999999999999, |
|
"llama-3.2-3b-instruct": 0.10400000000000001, |
|
"llama-3.3-70b-instruct": 0.704, |
|
"mistral-large-instruct-2411": 0.7919999999999999, |
|
"gemma-2-27b-it": 0.328, |
|
"gemma-2-9b-it": 0.192, |
|
"deepseek-v3": 0.8560000000000001, |
|
"deepseek-r1": 0.748, |
|
"qwq-32b": 0.784, |
|
"Average": 0.5352 |
|
}, |
|
"RenewableEnergyEnv": { |
|
"qwen2.5-3b-instruct": 0.184, |
|
"qwen2.5-7b-instruct": 0.44399999999999995, |
|
"qwen2.5-14b-instruct": 0.648, |
|
"qwen2.5-32b-instruct": 0.932, |
|
"qwen2.5-72b-instruct": 0.8880000000000001, |
|
"llama-3.1-8b-instruct": 0.396, |
|
"llama-3.1-70b-instruct": 0.812, |
|
"llama-3.2-3b-instruct": 0.2, |
|
"llama-3.3-70b-instruct": 0.8240000000000001, |
|
"mistral-large-instruct-2411": 0.8560000000000001, |
|
"gemma-2-27b-it": 0.348, |
|
"gemma-2-9b-it": 0.188, |
|
"deepseek-v3": 0.96, |
|
"deepseek-r1": 0.9800000000000001, |
|
"qwq-32b": 0.9800000000000001, |
|
"Average": 0.6426666666666667 |
|
}, |
|
"CelestialEnv": { |
|
"qwen2.5-3b-instruct": 0.20400000000000001, |
|
"qwen2.5-7b-instruct": 0.252, |
|
"qwen2.5-14b-instruct": 0.728, |
|
"qwen2.5-32b-instruct": 0.792, |
|
"qwen2.5-72b-instruct": 0.7239999999999999, |
|
"llama-3.1-8b-instruct": 0.256, |
|
"llama-3.1-70b-instruct": 0.6920000000000001, |
|
"llama-3.2-3b-instruct": 0.192, |
|
"llama-3.3-70b-instruct": 0.744, |
|
"mistral-large-instruct-2411": 0.82, |
|
"gemma-2-27b-it": 0.528, |
|
"gemma-2-9b-it": 0.344, |
|
"deepseek-v3": 0.8480000000000001, |
|
"deepseek-r1": 0.8360000000000001, |
|
"qwq-32b": 0.8879999999999999, |
|
"Average": 0.5898666666666668 |
|
}, |
|
"SpiceEnv": { |
|
"qwen2.5-3b-instruct": 0.21199999999999997, |
|
"qwen2.5-7b-instruct": 0.332, |
|
"qwen2.5-14b-instruct": 0.672, |
|
"qwen2.5-32b-instruct": 0.476, |
|
"qwen2.5-72b-instruct": 0.5880000000000001, |
|
"llama-3.1-8b-instruct": 0.32799999999999996, |
|
"llama-3.1-70b-instruct": 0.40800000000000003, |
|
"llama-3.2-3b-instruct": 0.22000000000000003, |
|
"llama-3.3-70b-instruct": 0.336, |
|
"mistral-large-instruct-2411": 0.5800000000000001, |
|
"gemma-2-27b-it": 0.28400000000000003, |
|
"gemma-2-9b-it": 0.172, |
|
"deepseek-v3": 0.908, |
|
"deepseek-r1": 0.7679999999999999, |
|
"qwq-32b": 0.8720000000000001, |
|
"Average": 0.47706666666666664 |
|
}, |
|
"WildlifeEnv": { |
|
"qwen2.5-3b-instruct": 0.21600000000000003, |
|
"qwen2.5-7b-instruct": 0.352, |
|
"qwen2.5-14b-instruct": 0.644, |
|
"qwen2.5-32b-instruct": 0.592, |
|
"qwen2.5-72b-instruct": 0.616, |
|
"llama-3.1-8b-instruct": 0.316, |
|
"llama-3.1-70b-instruct": 0.544, |
|
"llama-3.2-3b-instruct": 0.23199999999999998, |
|
"llama-3.3-70b-instruct": 0.616, |
|
"mistral-large-instruct-2411": 0.628, |
|
"gemma-2-27b-it": 0.45199999999999996, |
|
"gemma-2-9b-it": 0.344, |
|
"deepseek-v3": 0.736, |
|
"deepseek-r1": 0.6040000000000001, |
|
"qwq-32b": 0.716, |
|
"Average": 0.5072 |
|
}, |
|
"VehicleEnv": { |
|
"qwen2.5-3b-instruct": 0.172, |
|
"qwen2.5-7b-instruct": 0.308, |
|
"qwen2.5-14b-instruct": 0.54, |
|
"qwen2.5-32b-instruct": 0.776, |
|
"qwen2.5-72b-instruct": 0.78, |
|
"llama-3.1-8b-instruct": 0.248, |
|
"llama-3.1-70b-instruct": 0.62, |
|
"llama-3.2-3b-instruct": 0.152, |
|
"llama-3.3-70b-instruct": 0.6960000000000001, |
|
"mistral-large-instruct-2411": 0.8800000000000001, |
|
"gemma-2-27b-it": 0.44799999999999995, |
|
"gemma-2-9b-it": 0.248, |
|
"deepseek-v3": 0.9199999999999999, |
|
"deepseek-r1": 0.9199999999999999, |
|
"qwq-32b": 0.916, |
|
"Average": 0.5749333333333333 |
|
}, |
|
"BeverageEnv": { |
|
"qwen2.5-3b-instruct": 0.128, |
|
"qwen2.5-7b-instruct": 0.296, |
|
"qwen2.5-14b-instruct": 0.792, |
|
"qwen2.5-32b-instruct": 0.6880000000000001, |
|
"qwen2.5-72b-instruct": 0.724, |
|
"llama-3.1-8b-instruct": 0.41200000000000003, |
|
"llama-3.1-70b-instruct": 0.6199999999999999, |
|
"llama-3.2-3b-instruct": 0.16399999999999998, |
|
"llama-3.3-70b-instruct": 0.5800000000000001, |
|
"mistral-large-instruct-2411": 0.748, |
|
"gemma-2-27b-it": 0.40800000000000003, |
|
"gemma-2-9b-it": 0.296, |
|
"deepseek-v3": 0.8800000000000001, |
|
"deepseek-r1": 0.7520000000000001, |
|
"qwq-32b": 0.844, |
|
"Average": 0.5554666666666667 |
|
}, |
|
"ControlEnv": { |
|
"qwen2.5-3b-instruct": 0.12800000000000003, |
|
"qwen2.5-7b-instruct": 0.364, |
|
"qwen2.5-14b-instruct": 0.68, |
|
"qwen2.5-32b-instruct": 0.8320000000000001, |
|
"qwen2.5-72b-instruct": 0.8400000000000001, |
|
"llama-3.1-8b-instruct": 0.364, |
|
"llama-3.1-70b-instruct": 0.656, |
|
"llama-3.2-3b-instruct": 0.15599999999999997, |
|
"llama-3.3-70b-instruct": 0.6320000000000001, |
|
"mistral-large-instruct-2411": 0.784, |
|
"gemma-2-27b-it": 0.4640000000000001, |
|
"gemma-2-9b-it": 0.18, |
|
"deepseek-v3": 0.9119999999999999, |
|
"deepseek-r1": 0.9119999999999999, |
|
"qwq-32b": 0.932, |
|
"Average": 0.5890666666666665 |
|
}, |
|
"CurrencyEnv": { |
|
"qwen2.5-3b-instruct": 0.252, |
|
"qwen2.5-7b-instruct": 0.392, |
|
"qwen2.5-14b-instruct": 0.8560000000000001, |
|
"qwen2.5-32b-instruct": 0.884, |
|
"qwen2.5-72b-instruct": 0.836, |
|
"llama-3.1-8b-instruct": 0.476, |
|
"llama-3.1-70b-instruct": 0.7520000000000001, |
|
"llama-3.2-3b-instruct": 0.22400000000000003, |
|
"llama-3.3-70b-instruct": 0.7000000000000001, |
|
"mistral-large-instruct-2411": 0.8960000000000001, |
|
"gemma-2-27b-it": 0.68, |
|
"gemma-2-9b-it": 0.196, |
|
"deepseek-v3": 0.9800000000000001, |
|
"deepseek-r1": 0.932, |
|
"qwq-32b": 0.9640000000000001, |
|
"Average": 0.668 |
|
}, |
|
"MarketingEnv": { |
|
"qwen2.5-3b-instruct": 0.12, |
|
"qwen2.5-7b-instruct": 0.34400000000000003, |
|
"qwen2.5-14b-instruct": 0.524, |
|
"qwen2.5-32b-instruct": 0.7479999999999999, |
|
"qwen2.5-72b-instruct": 0.732, |
|
"llama-3.1-8b-instruct": 0.30800000000000005, |
|
"llama-3.1-70b-instruct": 0.7040000000000001, |
|
"llama-3.2-3b-instruct": 0.14400000000000002, |
|
"llama-3.3-70b-instruct": 0.7639999999999999, |
|
"mistral-large-instruct-2411": 0.7600000000000001, |
|
"gemma-2-27b-it": 0.32400000000000007, |
|
"gemma-2-9b-it": 0.184, |
|
"deepseek-v3": 0.812, |
|
"deepseek-r1": 0.7959999999999999, |
|
"qwq-32b": 0.8320000000000001, |
|
"Average": 0.5397333333333333 |
|
}, |
|
"BotanicalEnv": { |
|
"qwen2.5-3b-instruct": 0.18800000000000003, |
|
"qwen2.5-7b-instruct": 0.316, |
|
"qwen2.5-14b-instruct": 0.9119999999999999, |
|
"qwen2.5-32b-instruct": 0.884, |
|
"qwen2.5-72b-instruct": 0.9039999999999999, |
|
"llama-3.1-8b-instruct": 0.4119999999999999, |
|
"llama-3.1-70b-instruct": 0.836, |
|
"llama-3.2-3b-instruct": 0.23600000000000004, |
|
"llama-3.3-70b-instruct": 0.8480000000000001, |
|
"mistral-large-instruct-2411": 0.8640000000000001, |
|
"gemma-2-27b-it": 0.604, |
|
"gemma-2-9b-it": 0.264, |
|
"deepseek-v3": 0.9040000000000001, |
|
"deepseek-r1": 0.9399999999999998, |
|
"qwq-32b": 0.968, |
|
"Average": 0.672 |
|
}, |
|
"CircusActEnv": { |
|
"qwen2.5-3b-instruct": 0.17200000000000001, |
|
"qwen2.5-7b-instruct": 0.32399999999999995, |
|
"qwen2.5-14b-instruct": 0.64, |
|
"qwen2.5-32b-instruct": 0.712, |
|
"qwen2.5-72b-instruct": 0.768, |
|
"llama-3.1-8b-instruct": 0.276, |
|
"llama-3.1-70b-instruct": 0.648, |
|
"llama-3.2-3b-instruct": 0.176, |
|
"llama-3.3-70b-instruct": 0.62, |
|
"mistral-large-instruct-2411": 0.748, |
|
"gemma-2-27b-it": 0.384, |
|
"gemma-2-9b-it": 0.29600000000000004, |
|
"deepseek-v3": 0.8640000000000001, |
|
"deepseek-r1": 0.82, |
|
"qwq-32b": 0.8720000000000001, |
|
"Average": 0.5546666666666668 |
|
}, |
|
"AudioDialectEnv": { |
|
"qwen2.5-3b-instruct": 0.128, |
|
"qwen2.5-7b-instruct": 0.312, |
|
"qwen2.5-14b-instruct": 0.5800000000000001, |
|
"qwen2.5-32b-instruct": 0.6, |
|
"qwen2.5-72b-instruct": 0.528, |
|
"llama-3.1-8b-instruct": 0.21600000000000003, |
|
"llama-3.1-70b-instruct": 0.4, |
|
"llama-3.2-3b-instruct": 0.132, |
|
"llama-3.3-70b-instruct": 0.32399999999999995, |
|
"mistral-large-instruct-2411": 0.68, |
|
"gemma-2-27b-it": 0.28, |
|
"gemma-2-9b-it": 0.11600000000000002, |
|
"deepseek-v3": 0.7520000000000001, |
|
"deepseek-r1": 0.7919999999999999, |
|
"qwq-32b": 0.8119999999999999, |
|
"Average": 0.4434666666666666 |
|
}, |
|
"LeadershipEnv": { |
|
"qwen2.5-3b-instruct": 0.164, |
|
"qwen2.5-7b-instruct": 0.372, |
|
"qwen2.5-14b-instruct": 0.7, |
|
"qwen2.5-32b-instruct": 0.732, |
|
"qwen2.5-72b-instruct": 0.7639999999999999, |
|
"llama-3.1-8b-instruct": 0.364, |
|
"llama-3.1-70b-instruct": 0.708, |
|
"llama-3.2-3b-instruct": 0.128, |
|
"llama-3.3-70b-instruct": 0.6920000000000001, |
|
"mistral-large-instruct-2411": 0.728, |
|
"gemma-2-27b-it": 0.46799999999999997, |
|
"gemma-2-9b-it": 0.20400000000000001, |
|
"deepseek-v3": 0.8200000000000001, |
|
"deepseek-r1": 0.748, |
|
"qwq-32b": 0.828, |
|
"Average": 0.5613333333333334 |
|
}, |
|
"TransportEnv": { |
|
"qwen2.5-3b-instruct": 0.196, |
|
"qwen2.5-7b-instruct": 0.372, |
|
"qwen2.5-14b-instruct": 0.716, |
|
"qwen2.5-32b-instruct": 0.732, |
|
"qwen2.5-72b-instruct": 0.8, |
|
"llama-3.1-8b-instruct": 0.316, |
|
"llama-3.1-70b-instruct": 0.648, |
|
"llama-3.2-3b-instruct": 0.15200000000000002, |
|
"llama-3.3-70b-instruct": 0.6000000000000001, |
|
"mistral-large-instruct-2411": 0.7879999999999999, |
|
"gemma-2-27b-it": 0.44399999999999995, |
|
"gemma-2-9b-it": 0.364, |
|
"deepseek-v3": 0.8640000000000001, |
|
"deepseek-r1": 0.8240000000000001, |
|
"qwq-32b": 0.9199999999999999, |
|
"Average": 0.5824 |
|
}, |
|
"EcologicalEnv": { |
|
"qwen2.5-3b-instruct": 0.152, |
|
"qwen2.5-7b-instruct": 0.45600000000000007, |
|
"qwen2.5-14b-instruct": 0.748, |
|
"qwen2.5-32b-instruct": 0.82, |
|
"qwen2.5-72b-instruct": 0.792, |
|
"llama-3.1-8b-instruct": 0.42000000000000004, |
|
"llama-3.1-70b-instruct": 0.692, |
|
"llama-3.2-3b-instruct": 0.21600000000000003, |
|
"llama-3.3-70b-instruct": 0.64, |
|
"mistral-large-instruct-2411": 0.772, |
|
"gemma-2-27b-it": 0.5680000000000001, |
|
"gemma-2-9b-it": 0.46799999999999997, |
|
"deepseek-v3": 0.868, |
|
"deepseek-r1": 0.8720000000000001, |
|
"qwq-32b": 0.8879999999999999, |
|
"Average": 0.6248 |
|
}, |
|
"MythicEnv": { |
|
"qwen2.5-3b-instruct": 0.132, |
|
"qwen2.5-7b-instruct": 0.36, |
|
"qwen2.5-14b-instruct": 0.744, |
|
"qwen2.5-32b-instruct": 0.74, |
|
"qwen2.5-72b-instruct": 0.672, |
|
"llama-3.1-8b-instruct": 0.236, |
|
"llama-3.1-70b-instruct": 0.596, |
|
"llama-3.2-3b-instruct": 0.12, |
|
"llama-3.3-70b-instruct": 0.576, |
|
"mistral-large-instruct-2411": 0.6960000000000001, |
|
"gemma-2-27b-it": 0.45599999999999996, |
|
"gemma-2-9b-it": 0.136, |
|
"deepseek-v3": 0.8960000000000001, |
|
"deepseek-r1": 0.8720000000000001, |
|
"qwq-32b": 0.8400000000000001, |
|
"Average": 0.5381333333333332 |
|
}, |
|
"EnzymeEnv": { |
|
"qwen2.5-3b-instruct": 0.252, |
|
"qwen2.5-7b-instruct": 0.43200000000000005, |
|
"qwen2.5-14b-instruct": 0.636, |
|
"qwen2.5-32b-instruct": 0.676, |
|
"qwen2.5-72b-instruct": 0.676, |
|
"llama-3.1-8b-instruct": 0.316, |
|
"llama-3.1-70b-instruct": 0.552, |
|
"llama-3.2-3b-instruct": 0.192, |
|
"llama-3.3-70b-instruct": 0.5640000000000001, |
|
"mistral-large-instruct-2411": 0.732, |
|
"gemma-2-27b-it": 0.43600000000000005, |
|
"gemma-2-9b-it": 0.264, |
|
"deepseek-v3": 0.8400000000000001, |
|
"deepseek-r1": 0.76, |
|
"qwq-32b": 0.804, |
|
"Average": 0.5421333333333334 |
|
}, |
|
"OSKernelEnv": { |
|
"qwen2.5-3b-instruct": 0.192, |
|
"qwen2.5-7b-instruct": 0.28400000000000003, |
|
"qwen2.5-14b-instruct": 0.8119999999999999, |
|
"qwen2.5-32b-instruct": 0.784, |
|
"qwen2.5-72b-instruct": 0.788, |
|
"llama-3.1-8b-instruct": 0.316, |
|
"llama-3.1-70b-instruct": 0.6920000000000001, |
|
"llama-3.2-3b-instruct": 0.128, |
|
"llama-3.3-70b-instruct": 0.74, |
|
"mistral-large-instruct-2411": 0.8559999999999999, |
|
"gemma-2-27b-it": 0.46399999999999997, |
|
"gemma-2-9b-it": 0.2, |
|
"deepseek-v3": 0.9480000000000001, |
|
"deepseek-r1": 0.96, |
|
"qwq-32b": 0.984, |
|
"Average": 0.6098666666666668 |
|
}, |
|
"MineralClassificationEnv": { |
|
"qwen2.5-3b-instruct": 0.11600000000000002, |
|
"qwen2.5-7b-instruct": 0.248, |
|
"qwen2.5-14b-instruct": 0.8320000000000001, |
|
"qwen2.5-32b-instruct": 0.9040000000000001, |
|
"qwen2.5-72b-instruct": 0.884, |
|
"llama-3.1-8b-instruct": 0.384, |
|
"llama-3.1-70b-instruct": 0.8240000000000001, |
|
"llama-3.2-3b-instruct": 0.14800000000000002, |
|
"llama-3.3-70b-instruct": 0.8960000000000001, |
|
"mistral-large-instruct-2411": 0.908, |
|
"gemma-2-27b-it": 0.508, |
|
"gemma-2-9b-it": 0.268, |
|
"deepseek-v3": 0.984, |
|
"deepseek-r1": 0.9199999999999999, |
|
"qwq-32b": 0.9640000000000001, |
|
"Average": 0.6525333333333333 |
|
}, |
|
"EconomicEnv": { |
|
"qwen2.5-3b-instruct": 0.136, |
|
"qwen2.5-7b-instruct": 0.24, |
|
"qwen2.5-14b-instruct": 0.8560000000000001, |
|
"qwen2.5-32b-instruct": 0.9199999999999999, |
|
"qwen2.5-72b-instruct": 0.8960000000000001, |
|
"llama-3.1-8b-instruct": 0.43600000000000005, |
|
"llama-3.1-70b-instruct": 0.808, |
|
"llama-3.2-3b-instruct": 0.152, |
|
"llama-3.3-70b-instruct": 0.8240000000000001, |
|
"mistral-large-instruct-2411": 0.924, |
|
"gemma-2-27b-it": 0.45199999999999996, |
|
"gemma-2-9b-it": 0.36, |
|
"deepseek-v3": 0.9559999999999998, |
|
"deepseek-r1": 0.9359999999999999, |
|
"qwq-32b": 0.9719999999999999, |
|
"Average": 0.6578666666666667 |
|
}, |
|
"DetectiveEnv": { |
|
"qwen2.5-3b-instruct": 0.168, |
|
"qwen2.5-7b-instruct": 0.38, |
|
"qwen2.5-14b-instruct": 0.836, |
|
"qwen2.5-32b-instruct": 0.884, |
|
"qwen2.5-72b-instruct": 0.8480000000000001, |
|
"llama-3.1-8b-instruct": 0.34800000000000003, |
|
"llama-3.1-70b-instruct": 0.74, |
|
"llama-3.2-3b-instruct": 0.248, |
|
"llama-3.3-70b-instruct": 0.792, |
|
"mistral-large-instruct-2411": 0.8960000000000001, |
|
"gemma-2-27b-it": 0.512, |
|
"gemma-2-9b-it": 0.33199999999999996, |
|
"deepseek-v3": 0.976, |
|
"deepseek-r1": 0.9640000000000001, |
|
"qwq-32b": 0.984, |
|
"Average": 0.6605333333333333 |
|
}, |
|
"ChessEnv": { |
|
"qwen2.5-3b-instruct": 0.184, |
|
"qwen2.5-7b-instruct": 0.27999999999999997, |
|
"qwen2.5-14b-instruct": 0.592, |
|
"qwen2.5-32b-instruct": 0.616, |
|
"qwen2.5-72b-instruct": 0.5720000000000001, |
|
"llama-3.1-8b-instruct": 0.188, |
|
"llama-3.1-70b-instruct": 0.6639999999999999, |
|
"llama-3.2-3b-instruct": 0.084, |
|
"llama-3.3-70b-instruct": 0.6280000000000001, |
|
"mistral-large-instruct-2411": 0.744, |
|
"gemma-2-27b-it": 0.30000000000000004, |
|
"gemma-2-9b-it": 0.096, |
|
"deepseek-v3": 0.696, |
|
"deepseek-r1": 0.6519999999999999, |
|
"qwq-32b": 0.664, |
|
"Average": 0.4639999999999999 |
|
}, |
|
"MythicalEnv": { |
|
"qwen2.5-3b-instruct": 0.2, |
|
"qwen2.5-7b-instruct": 0.336, |
|
"qwen2.5-14b-instruct": 0.8039999999999999, |
|
"qwen2.5-32b-instruct": 0.712, |
|
"qwen2.5-72b-instruct": 0.632, |
|
"llama-3.1-8b-instruct": 0.356, |
|
"llama-3.1-70b-instruct": 0.54, |
|
"llama-3.2-3b-instruct": 0.16, |
|
"llama-3.3-70b-instruct": 0.556, |
|
"mistral-large-instruct-2411": 0.728, |
|
"gemma-2-27b-it": 0.54, |
|
"gemma-2-9b-it": 0.404, |
|
"deepseek-v3": 0.9279999999999999, |
|
"deepseek-r1": 0.8959999999999999, |
|
"qwq-32b": 0.876, |
|
"Average": 0.5778666666666666 |
|
}, |
|
"ChemicalCompoundsEnv": { |
|
"qwen2.5-3b-instruct": 0.18, |
|
"qwen2.5-7b-instruct": 0.252, |
|
"qwen2.5-14b-instruct": 0.40800000000000003, |
|
"qwen2.5-32b-instruct": 0.30000000000000004, |
|
"qwen2.5-72b-instruct": 0.28400000000000003, |
|
"llama-3.1-8b-instruct": 0.148, |
|
"llama-3.1-70b-instruct": 0.28, |
|
"llama-3.2-3b-instruct": 0.14, |
|
"llama-3.3-70b-instruct": 0.18000000000000002, |
|
"mistral-large-instruct-2411": 0.43200000000000005, |
|
"gemma-2-27b-it": 0.23200000000000004, |
|
"gemma-2-9b-it": 0.13599999999999998, |
|
"deepseek-v3": 0.46799999999999997, |
|
"deepseek-r1": 0.624, |
|
"qwq-32b": 0.752, |
|
"Average": 0.32106666666666667 |
|
}, |
|
"ArchitecturalEnv": { |
|
"qwen2.5-3b-instruct": 0.20400000000000001, |
|
"qwen2.5-7b-instruct": 0.316, |
|
"qwen2.5-14b-instruct": 0.72, |
|
"qwen2.5-32b-instruct": 0.66, |
|
"qwen2.5-72b-instruct": 0.7120000000000001, |
|
"llama-3.1-8b-instruct": 0.256, |
|
"llama-3.1-70b-instruct": 0.556, |
|
"llama-3.2-3b-instruct": 0.132, |
|
"llama-3.3-70b-instruct": 0.508, |
|
"mistral-large-instruct-2411": 0.724, |
|
"gemma-2-27b-it": 0.488, |
|
"gemma-2-9b-it": 0.236, |
|
"deepseek-v3": 0.82, |
|
"deepseek-r1": 0.744, |
|
"qwq-32b": 0.8240000000000001, |
|
"Average": 0.5266666666666666 |
|
}, |
|
"ComputationEnv": { |
|
"qwen2.5-3b-instruct": 0.152, |
|
"qwen2.5-7b-instruct": 0.248, |
|
"qwen2.5-14b-instruct": 0.76, |
|
"qwen2.5-32b-instruct": 0.884, |
|
"qwen2.5-72b-instruct": 0.8560000000000001, |
|
"llama-3.1-8b-instruct": 0.32799999999999996, |
|
"llama-3.1-70b-instruct": 0.788, |
|
"llama-3.2-3b-instruct": 0.13999999999999999, |
|
"llama-3.3-70b-instruct": 0.8560000000000001, |
|
"mistral-large-instruct-2411": 0.828, |
|
"gemma-2-27b-it": 0.45199999999999996, |
|
"gemma-2-9b-it": 0.252, |
|
"deepseek-v3": 0.96, |
|
"deepseek-r1": 0.9399999999999998, |
|
"qwq-32b": 0.908, |
|
"Average": 0.6234666666666667 |
|
}, |
|
"MachinePartEnv": { |
|
"qwen2.5-3b-instruct": 0.14, |
|
"qwen2.5-7b-instruct": 0.32, |
|
"qwen2.5-14b-instruct": 0.8240000000000001, |
|
"qwen2.5-32b-instruct": 0.8800000000000001, |
|
"qwen2.5-72b-instruct": 0.828, |
|
"llama-3.1-8b-instruct": 0.376, |
|
"llama-3.1-70b-instruct": 0.8200000000000001, |
|
"llama-3.2-3b-instruct": 0.168, |
|
"llama-3.3-70b-instruct": 0.8960000000000001, |
|
"mistral-large-instruct-2411": 0.876, |
|
"gemma-2-27b-it": 0.508, |
|
"gemma-2-9b-it": 0.268, |
|
"deepseek-v3": 0.9719999999999999, |
|
"deepseek-r1": 0.952, |
|
"qwq-32b": 0.916, |
|
"Average": 0.6496 |
|
}, |
|
"LiteraryEnv": { |
|
"qwen2.5-3b-instruct": 0.10400000000000001, |
|
"qwen2.5-7b-instruct": 0.328, |
|
"qwen2.5-14b-instruct": 0.8800000000000001, |
|
"qwen2.5-32b-instruct": 0.9279999999999999, |
|
"qwen2.5-72b-instruct": 0.9, |
|
"llama-3.1-8b-instruct": 0.336, |
|
"llama-3.1-70b-instruct": 0.664, |
|
"llama-3.2-3b-instruct": 0.13999999999999999, |
|
"llama-3.3-70b-instruct": 0.664, |
|
"mistral-large-instruct-2411": 0.884, |
|
"gemma-2-27b-it": 0.44399999999999995, |
|
"gemma-2-9b-it": 0.13999999999999999, |
|
"deepseek-v3": 0.984, |
|
"deepseek-r1": 0.9119999999999999, |
|
"qwq-32b": 0.968, |
|
"Average": 0.6184 |
|
}, |
|
"MarineEnv": { |
|
"qwen2.5-3b-instruct": 0.144, |
|
"qwen2.5-7b-instruct": 0.384, |
|
"qwen2.5-14b-instruct": 0.8720000000000001, |
|
"qwen2.5-32b-instruct": 0.844, |
|
"qwen2.5-72b-instruct": 0.8320000000000001, |
|
"llama-3.1-8b-instruct": 0.308, |
|
"llama-3.1-70b-instruct": 0.636, |
|
"llama-3.2-3b-instruct": 0.12000000000000002, |
|
"llama-3.3-70b-instruct": 0.704, |
|
"mistral-large-instruct-2411": 0.7879999999999999, |
|
"gemma-2-27b-it": 0.484, |
|
"gemma-2-9b-it": 0.23199999999999998, |
|
"deepseek-v3": 0.884, |
|
"deepseek-r1": 0.9, |
|
"qwq-32b": 0.8880000000000001, |
|
"Average": 0.6013333333333334 |
|
}, |
|
"PhilosophyEnv": { |
|
"qwen2.5-3b-instruct": 0.144, |
|
"qwen2.5-7b-instruct": 0.3, |
|
"qwen2.5-14b-instruct": 0.7280000000000001, |
|
"qwen2.5-32b-instruct": 0.82, |
|
"qwen2.5-72b-instruct": 0.8719999999999999, |
|
"llama-3.1-8b-instruct": 0.32799999999999996, |
|
"llama-3.1-70b-instruct": 0.764, |
|
"llama-3.2-3b-instruct": 0.036000000000000004, |
|
"llama-3.3-70b-instruct": 0.796, |
|
"mistral-large-instruct-2411": 0.7879999999999999, |
|
"gemma-2-27b-it": 0.372, |
|
"gemma-2-9b-it": 0.28, |
|
"deepseek-v3": 0.844, |
|
"deepseek-r1": 0.78, |
|
"qwq-32b": 0.8320000000000001, |
|
"Average": 0.5789333333333334 |
|
}, |
|
"ArchaeologicalEnv": { |
|
"qwen2.5-3b-instruct": 0.18, |
|
"qwen2.5-7b-instruct": 0.38, |
|
"qwen2.5-14b-instruct": 0.58, |
|
"qwen2.5-32b-instruct": 0.608, |
|
"qwen2.5-72b-instruct": 0.5640000000000001, |
|
"llama-3.1-8b-instruct": 0.26, |
|
"llama-3.1-70b-instruct": 0.608, |
|
"llama-3.2-3b-instruct": 0.192, |
|
"llama-3.3-70b-instruct": 0.548, |
|
"mistral-large-instruct-2411": 0.64, |
|
"gemma-2-27b-it": 0.476, |
|
"gemma-2-9b-it": 0.30000000000000004, |
|
"deepseek-v3": 0.916, |
|
"deepseek-r1": 0.7040000000000001, |
|
"qwq-32b": 0.7559999999999999, |
|
"Average": 0.5141333333333333 |
|
}, |
|
"GemstoneEnv": { |
|
"qwen2.5-3b-instruct": 0.192, |
|
"qwen2.5-7b-instruct": 0.264, |
|
"qwen2.5-14b-instruct": 0.492, |
|
"qwen2.5-32b-instruct": 0.45599999999999996, |
|
"qwen2.5-72b-instruct": 0.44000000000000006, |
|
"llama-3.1-8b-instruct": 0.192, |
|
"llama-3.1-70b-instruct": 0.40800000000000003, |
|
"llama-3.2-3b-instruct": 0.15200000000000002, |
|
"llama-3.3-70b-instruct": 0.45599999999999996, |
|
"mistral-large-instruct-2411": 0.528, |
|
"gemma-2-27b-it": 0.33999999999999997, |
|
"gemma-2-9b-it": 0.256, |
|
"deepseek-v3": 0.5680000000000001, |
|
"deepseek-r1": 0.5680000000000001, |
|
"qwq-32b": 0.636, |
|
"Average": 0.3965333333333333 |
|
}, |
|
"MicrobiologyEnv": { |
|
"qwen2.5-3b-instruct": 0.14400000000000002, |
|
"qwen2.5-7b-instruct": 0.38400000000000006, |
|
"qwen2.5-14b-instruct": 0.752, |
|
"qwen2.5-32b-instruct": 0.7, |
|
"qwen2.5-72b-instruct": 0.844, |
|
"llama-3.1-8b-instruct": 0.316, |
|
"llama-3.1-70b-instruct": 0.512, |
|
"llama-3.2-3b-instruct": 0.12000000000000002, |
|
"llama-3.3-70b-instruct": 0.496, |
|
"mistral-large-instruct-2411": 0.764, |
|
"gemma-2-27b-it": 0.504, |
|
"gemma-2-9b-it": 0.172, |
|
"deepseek-v3": 0.9279999999999999, |
|
"deepseek-r1": 0.952, |
|
"qwq-32b": 0.932, |
|
"Average": 0.568 |
|
}, |
|
"SciFiEnv": { |
|
"qwen2.5-3b-instruct": 0.192, |
|
"qwen2.5-7b-instruct": 0.384, |
|
"qwen2.5-14b-instruct": 0.7879999999999999, |
|
"qwen2.5-32b-instruct": 0.776, |
|
"qwen2.5-72b-instruct": 0.7879999999999999, |
|
"llama-3.1-8b-instruct": 0.35200000000000004, |
|
"llama-3.1-70b-instruct": 0.664, |
|
"llama-3.2-3b-instruct": 0.164, |
|
"llama-3.3-70b-instruct": 0.588, |
|
"mistral-large-instruct-2411": 0.736, |
|
"gemma-2-27b-it": 0.52, |
|
"gemma-2-9b-it": 0.33599999999999997, |
|
"deepseek-v3": 0.9279999999999999, |
|
"deepseek-r1": 0.9199999999999999, |
|
"qwq-32b": 0.9, |
|
"Average": 0.6023999999999999 |
|
}, |
|
"HormoneEnv": { |
|
"qwen2.5-3b-instruct": 0.152, |
|
"qwen2.5-7b-instruct": 0.40800000000000003, |
|
"qwen2.5-14b-instruct": 0.7999999999999999, |
|
"qwen2.5-32b-instruct": 0.784, |
|
"qwen2.5-72b-instruct": 0.764, |
|
"llama-3.1-8b-instruct": 0.336, |
|
"llama-3.1-70b-instruct": 0.76, |
|
"llama-3.2-3b-instruct": 0.184, |
|
"llama-3.3-70b-instruct": 0.8480000000000001, |
|
"mistral-large-instruct-2411": 0.8, |
|
"gemma-2-27b-it": 0.524, |
|
"gemma-2-9b-it": 0.312, |
|
"deepseek-v3": 0.9480000000000001, |
|
"deepseek-r1": 0.944, |
|
"qwq-32b": 0.852, |
|
"Average": 0.6277333333333334 |
|
}, |
|
"SculptorEnv": { |
|
"qwen2.5-3b-instruct": 0.23200000000000004, |
|
"qwen2.5-7b-instruct": 0.4159999999999999, |
|
"qwen2.5-14b-instruct": 0.7079999999999999, |
|
"qwen2.5-32b-instruct": 0.636, |
|
"qwen2.5-72b-instruct": 0.6, |
|
"llama-3.1-8b-instruct": 0.22799999999999998, |
|
"llama-3.1-70b-instruct": 0.484, |
|
"llama-3.2-3b-instruct": 0.188, |
|
"llama-3.3-70b-instruct": 0.532, |
|
"mistral-large-instruct-2411": 0.684, |
|
"gemma-2-27b-it": 0.30000000000000004, |
|
"gemma-2-9b-it": 0.156, |
|
"deepseek-v3": 0.788, |
|
"deepseek-r1": 0.7479999999999999, |
|
"qwq-32b": 0.8119999999999999, |
|
"Average": 0.5008 |
|
}, |
|
"NeuroEnv": { |
|
"qwen2.5-3b-instruct": 0.10800000000000001, |
|
"qwen2.5-7b-instruct": 0.24400000000000005, |
|
"qwen2.5-14b-instruct": 0.8960000000000001, |
|
"qwen2.5-32b-instruct": 0.892, |
|
"qwen2.5-72b-instruct": 0.8879999999999999, |
|
"llama-3.1-8b-instruct": 0.512, |
|
"llama-3.1-70b-instruct": 0.8880000000000001, |
|
"llama-3.2-3b-instruct": 0.20400000000000001, |
|
"llama-3.3-70b-instruct": 0.9279999999999999, |
|
"mistral-large-instruct-2411": 0.8880000000000001, |
|
"gemma-2-27b-it": 0.72, |
|
"gemma-2-9b-it": 0.42800000000000005, |
|
"deepseek-v3": 0.952, |
|
"deepseek-r1": 0.932, |
|
"qwq-32b": 0.852, |
|
"Average": 0.6888000000000001 |
|
}, |
|
"OceanEnv": { |
|
"qwen2.5-3b-instruct": 0.2, |
|
"qwen2.5-7b-instruct": 0.45999999999999996, |
|
"qwen2.5-14b-instruct": 0.6160000000000001, |
|
"qwen2.5-32b-instruct": 0.6000000000000001, |
|
"qwen2.5-72b-instruct": 0.62, |
|
"llama-3.1-8b-instruct": 0.36400000000000005, |
|
"llama-3.1-70b-instruct": 0.5680000000000001, |
|
"llama-3.2-3b-instruct": 0.156, |
|
"llama-3.3-70b-instruct": 0.476, |
|
"mistral-large-instruct-2411": 0.656, |
|
"gemma-2-27b-it": 0.43200000000000005, |
|
"gemma-2-9b-it": 0.248, |
|
"deepseek-v3": 0.852, |
|
"deepseek-r1": 0.836, |
|
"qwq-32b": 0.8240000000000001, |
|
"Average": 0.5272000000000001 |
|
}, |
|
"MineralEnv": { |
|
"qwen2.5-3b-instruct": 0.14400000000000002, |
|
"qwen2.5-7b-instruct": 0.38, |
|
"qwen2.5-14b-instruct": 0.768, |
|
"qwen2.5-32b-instruct": 0.6960000000000001, |
|
"qwen2.5-72b-instruct": 0.684, |
|
"llama-3.1-8b-instruct": 0.29600000000000004, |
|
"llama-3.1-70b-instruct": 0.556, |
|
"llama-3.2-3b-instruct": 0.16, |
|
"llama-3.3-70b-instruct": 0.56, |
|
"mistral-large-instruct-2411": 0.66, |
|
"gemma-2-27b-it": 0.384, |
|
"gemma-2-9b-it": 0.17200000000000001, |
|
"deepseek-v3": 0.8480000000000001, |
|
"deepseek-r1": 0.82, |
|
"qwq-32b": 0.8720000000000001, |
|
"Average": 0.5333333333333333 |
|
}, |
|
"FishEnv": { |
|
"qwen2.5-3b-instruct": 0.188, |
|
"qwen2.5-7b-instruct": 0.38, |
|
"qwen2.5-14b-instruct": 0.732, |
|
"qwen2.5-32b-instruct": 0.668, |
|
"qwen2.5-72b-instruct": 0.7200000000000001, |
|
"llama-3.1-8b-instruct": 0.392, |
|
"llama-3.1-70b-instruct": 0.624, |
|
"llama-3.2-3b-instruct": 0.13599999999999998, |
|
"llama-3.3-70b-instruct": 0.616, |
|
"mistral-large-instruct-2411": 0.736, |
|
"gemma-2-27b-it": 0.508, |
|
"gemma-2-9b-it": 0.268, |
|
"deepseek-v3": 0.86, |
|
"deepseek-r1": 0.868, |
|
"qwq-32b": 0.924, |
|
"Average": 0.5746666666666667 |
|
}, |
|
"MartialArtsEnv": { |
|
"qwen2.5-3b-instruct": 0.184, |
|
"qwen2.5-7b-instruct": 0.43200000000000005, |
|
"qwen2.5-14b-instruct": 0.672, |
|
"qwen2.5-32b-instruct": 0.5640000000000001, |
|
"qwen2.5-72b-instruct": 0.56, |
|
"llama-3.1-8b-instruct": 0.276, |
|
"llama-3.1-70b-instruct": 0.54, |
|
"llama-3.2-3b-instruct": 0.2, |
|
"llama-3.3-70b-instruct": 0.52, |
|
"mistral-large-instruct-2411": 0.568, |
|
"gemma-2-27b-it": 0.4, |
|
"gemma-2-9b-it": 0.22400000000000003, |
|
"deepseek-v3": 0.784, |
|
"deepseek-r1": 0.716, |
|
"qwq-32b": 0.752, |
|
"Average": 0.4928 |
|
}, |
|
"RocketFuelEnv": { |
|
"qwen2.5-3b-instruct": 0.22800000000000004, |
|
"qwen2.5-7b-instruct": 0.41600000000000004, |
|
"qwen2.5-14b-instruct": 0.852, |
|
"qwen2.5-32b-instruct": 0.7879999999999999, |
|
"qwen2.5-72b-instruct": 0.8160000000000001, |
|
"llama-3.1-8b-instruct": 0.36, |
|
"llama-3.1-70b-instruct": 0.6799999999999999, |
|
"llama-3.2-3b-instruct": 0.184, |
|
"llama-3.3-70b-instruct": 0.7239999999999999, |
|
"mistral-large-instruct-2411": 0.828, |
|
"gemma-2-27b-it": 0.6279999999999999, |
|
"gemma-2-9b-it": 0.248, |
|
"deepseek-v3": 0.916, |
|
"deepseek-r1": 0.8960000000000001, |
|
"qwq-32b": 0.9040000000000001, |
|
"Average": 0.6312000000000001 |
|
}, |
|
"MLEnv": { |
|
"qwen2.5-3b-instruct": 0.088, |
|
"qwen2.5-7b-instruct": 0.392, |
|
"qwen2.5-14b-instruct": 0.6, |
|
"qwen2.5-32b-instruct": 0.748, |
|
"qwen2.5-72b-instruct": 0.792, |
|
"llama-3.1-8b-instruct": 0.304, |
|
"llama-3.1-70b-instruct": 0.672, |
|
"llama-3.2-3b-instruct": 0.10799999999999998, |
|
"llama-3.3-70b-instruct": 0.5960000000000001, |
|
"mistral-large-instruct-2411": 0.7639999999999999, |
|
"gemma-2-27b-it": 0.264, |
|
"gemma-2-9b-it": 0.156, |
|
"deepseek-v3": 0.808, |
|
"deepseek-r1": 0.652, |
|
"qwq-32b": 0.772, |
|
"Average": 0.5144 |
|
}, |
|
"PoliticalManifestoEnv": { |
|
"qwen2.5-3b-instruct": 0.184, |
|
"qwen2.5-7b-instruct": 0.312, |
|
"qwen2.5-14b-instruct": 0.76, |
|
"qwen2.5-32b-instruct": 0.852, |
|
"qwen2.5-72b-instruct": 0.7839999999999999, |
|
"llama-3.1-8b-instruct": 0.42400000000000004, |
|
"llama-3.1-70b-instruct": 0.62, |
|
"llama-3.2-3b-instruct": 0.128, |
|
"llama-3.3-70b-instruct": 0.692, |
|
"mistral-large-instruct-2411": 0.796, |
|
"gemma-2-27b-it": 0.45200000000000007, |
|
"gemma-2-9b-it": 0.152, |
|
"deepseek-v3": 0.86, |
|
"deepseek-r1": 0.792, |
|
"qwq-32b": 0.8800000000000001, |
|
"Average": 0.5792 |
|
}, |
|
"CoffeeEnv": { |
|
"qwen2.5-3b-instruct": 0.20400000000000001, |
|
"qwen2.5-7b-instruct": 0.38, |
|
"qwen2.5-14b-instruct": 0.7799999999999999, |
|
"qwen2.5-32b-instruct": 0.8039999999999999, |
|
"qwen2.5-72b-instruct": 0.764, |
|
"llama-3.1-8b-instruct": 0.31599999999999995, |
|
"llama-3.1-70b-instruct": 0.552, |
|
"llama-3.2-3b-instruct": 0.17200000000000001, |
|
"llama-3.3-70b-instruct": 0.6599999999999999, |
|
"mistral-large-instruct-2411": 0.828, |
|
"gemma-2-27b-it": 0.592, |
|
"gemma-2-9b-it": 0.364, |
|
"deepseek-v3": 0.9120000000000001, |
|
"deepseek-r1": 0.9279999999999999, |
|
"qwq-32b": 0.9359999999999999, |
|
"Average": 0.6128 |
|
}, |
|
"MotifAnalysisEnv": { |
|
"qwen2.5-3b-instruct": 0.096, |
|
"qwen2.5-7b-instruct": 0.332, |
|
"qwen2.5-14b-instruct": 0.5680000000000001, |
|
"qwen2.5-32b-instruct": 0.496, |
|
"qwen2.5-72b-instruct": 0.5920000000000001, |
|
"llama-3.1-8b-instruct": 0.244, |
|
"llama-3.1-70b-instruct": 0.36000000000000004, |
|
"llama-3.2-3b-instruct": 0.13999999999999999, |
|
"llama-3.3-70b-instruct": 0.22400000000000003, |
|
"mistral-large-instruct-2411": 0.46399999999999997, |
|
"gemma-2-27b-it": 0.18, |
|
"gemma-2-9b-it": 0.128, |
|
"deepseek-v3": 0.752, |
|
"deepseek-r1": 0.8240000000000001, |
|
"qwq-32b": 0.8640000000000001, |
|
"Average": 0.4176 |
|
}, |
|
"NutritionEnv": { |
|
"qwen2.5-3b-instruct": 0.132, |
|
"qwen2.5-7b-instruct": 0.22000000000000003, |
|
"qwen2.5-14b-instruct": 0.7920000000000001, |
|
"qwen2.5-32b-instruct": 0.8400000000000001, |
|
"qwen2.5-72b-instruct": 0.876, |
|
"llama-3.1-8b-instruct": 0.264, |
|
"llama-3.1-70b-instruct": 0.64, |
|
"llama-3.2-3b-instruct": 0.128, |
|
"llama-3.3-70b-instruct": 0.7040000000000001, |
|
"mistral-large-instruct-2411": 0.8320000000000001, |
|
"gemma-2-27b-it": 0.38, |
|
"gemma-2-9b-it": 0.20800000000000002, |
|
"deepseek-v3": 0.944, |
|
"deepseek-r1": 0.944, |
|
"qwq-32b": 0.9120000000000001, |
|
"Average": 0.5877333333333333 |
|
}, |
|
"MalwareEnv": { |
|
"qwen2.5-3b-instruct": 0.16, |
|
"qwen2.5-7b-instruct": 0.316, |
|
"qwen2.5-14b-instruct": 0.728, |
|
"qwen2.5-32b-instruct": 0.756, |
|
"qwen2.5-72b-instruct": 0.7200000000000001, |
|
"llama-3.1-8b-instruct": 0.268, |
|
"llama-3.1-70b-instruct": 0.5840000000000001, |
|
"llama-3.2-3b-instruct": 0.10800000000000001, |
|
"llama-3.3-70b-instruct": 0.548, |
|
"mistral-large-instruct-2411": 0.752, |
|
"gemma-2-27b-it": 0.252, |
|
"gemma-2-9b-it": 0.12, |
|
"deepseek-v3": 0.916, |
|
"deepseek-r1": 0.9, |
|
"qwq-32b": 0.916, |
|
"Average": 0.5362666666666667 |
|
}, |
|
"GeologicalEnv": { |
|
"qwen2.5-3b-instruct": 0.132, |
|
"qwen2.5-7b-instruct": 0.336, |
|
"qwen2.5-14b-instruct": 0.7639999999999999, |
|
"qwen2.5-32b-instruct": 0.748, |
|
"qwen2.5-72b-instruct": 0.676, |
|
"llama-3.1-8b-instruct": 0.28800000000000003, |
|
"llama-3.1-70b-instruct": 0.552, |
|
"llama-3.2-3b-instruct": 0.13999999999999999, |
|
"llama-3.3-70b-instruct": 0.508, |
|
"mistral-large-instruct-2411": 0.812, |
|
"gemma-2-27b-it": 0.41600000000000004, |
|
"gemma-2-9b-it": 0.164, |
|
"deepseek-v3": 0.9119999999999999, |
|
"deepseek-r1": 0.8480000000000001, |
|
"qwq-32b": 0.8880000000000001, |
|
"Average": 0.5456000000000001 |
|
}, |
|
"TheatricalEnv": { |
|
"qwen2.5-3b-instruct": 0.14400000000000002, |
|
"qwen2.5-7b-instruct": 0.42400000000000004, |
|
"qwen2.5-14b-instruct": 0.676, |
|
"qwen2.5-32b-instruct": 0.78, |
|
"qwen2.5-72b-instruct": 0.808, |
|
"llama-3.1-8b-instruct": 0.41200000000000003, |
|
"llama-3.1-70b-instruct": 0.7959999999999999, |
|
"llama-3.2-3b-instruct": 0.1, |
|
"llama-3.3-70b-instruct": 0.768, |
|
"mistral-large-instruct-2411": 0.844, |
|
"gemma-2-27b-it": 0.528, |
|
"gemma-2-9b-it": 0.28, |
|
"deepseek-v3": 0.884, |
|
"deepseek-r1": 0.8240000000000001, |
|
"qwq-32b": 0.908, |
|
"Average": 0.6117333333333335 |
|
}, |
|
"PrintingTechniqueEnv": { |
|
"qwen2.5-3b-instruct": 0.144, |
|
"qwen2.5-7b-instruct": 0.252, |
|
"qwen2.5-14b-instruct": 0.736, |
|
"qwen2.5-32b-instruct": 0.7200000000000001, |
|
"qwen2.5-72b-instruct": 0.776, |
|
"llama-3.1-8b-instruct": 0.4, |
|
"llama-3.1-70b-instruct": 0.54, |
|
"llama-3.2-3b-instruct": 0.16, |
|
"llama-3.3-70b-instruct": 0.548, |
|
"mistral-large-instruct-2411": 0.7040000000000001, |
|
"gemma-2-27b-it": 0.44000000000000006, |
|
"gemma-2-9b-it": 0.192, |
|
"deepseek-v3": 0.916, |
|
"deepseek-r1": 0.852, |
|
"qwq-32b": 0.9279999999999999, |
|
"Average": 0.5538666666666666 |
|
}, |
|
"StellarEnv": { |
|
"qwen2.5-3b-instruct": 0.132, |
|
"qwen2.5-7b-instruct": 0.388, |
|
"qwen2.5-14b-instruct": 0.6759999999999999, |
|
"qwen2.5-32b-instruct": 0.724, |
|
"qwen2.5-72b-instruct": 0.6960000000000001, |
|
"llama-3.1-8b-instruct": 0.30000000000000004, |
|
"llama-3.1-70b-instruct": 0.6040000000000001, |
|
"llama-3.2-3b-instruct": 0.16, |
|
"llama-3.3-70b-instruct": 0.6240000000000001, |
|
"mistral-large-instruct-2411": 0.732, |
|
"gemma-2-27b-it": 0.364, |
|
"gemma-2-9b-it": 0.23199999999999998, |
|
"deepseek-v3": 0.82, |
|
"deepseek-r1": 0.648, |
|
"qwq-32b": 0.776, |
|
"Average": 0.5250666666666667 |
|
}, |
|
"SoilEnv": { |
|
"qwen2.5-3b-instruct": 0.172, |
|
"qwen2.5-7b-instruct": 0.48, |
|
"qwen2.5-14b-instruct": 0.8320000000000001, |
|
"qwen2.5-32b-instruct": 0.788, |
|
"qwen2.5-72b-instruct": 0.8240000000000001, |
|
"llama-3.1-8b-instruct": 0.42400000000000004, |
|
"llama-3.1-70b-instruct": 0.64, |
|
"llama-3.2-3b-instruct": 0.22799999999999998, |
|
"llama-3.3-70b-instruct": 0.664, |
|
"mistral-large-instruct-2411": 0.76, |
|
"gemma-2-27b-it": 0.628, |
|
"gemma-2-9b-it": 0.44000000000000006, |
|
"deepseek-v3": 0.884, |
|
"deepseek-r1": 0.8039999999999999, |
|
"qwq-32b": 0.8480000000000001, |
|
"Average": 0.6277333333333334 |
|
}, |
|
"SoftwareEnv": { |
|
"qwen2.5-3b-instruct": 0.14800000000000002, |
|
"qwen2.5-7b-instruct": 0.40800000000000003, |
|
"qwen2.5-14b-instruct": 0.744, |
|
"qwen2.5-32b-instruct": 0.86, |
|
"qwen2.5-72b-instruct": 0.8400000000000001, |
|
"llama-3.1-8b-instruct": 0.4159999999999999, |
|
"llama-3.1-70b-instruct": 0.72, |
|
"llama-3.2-3b-instruct": 0.16799999999999998, |
|
"llama-3.3-70b-instruct": 0.784, |
|
"mistral-large-instruct-2411": 0.804, |
|
"gemma-2-27b-it": 0.528, |
|
"gemma-2-9b-it": 0.308, |
|
"deepseek-v3": 0.836, |
|
"deepseek-r1": 0.8360000000000001, |
|
"qwq-32b": 0.8800000000000001, |
|
"Average": 0.6186666666666667 |
|
}, |
|
"CarIdentificationEnv": { |
|
"qwen2.5-3b-instruct": 0.272, |
|
"qwen2.5-7b-instruct": 0.4, |
|
"qwen2.5-14b-instruct": 0.9120000000000001, |
|
"qwen2.5-32b-instruct": 0.916, |
|
"qwen2.5-72b-instruct": 0.9359999999999999, |
|
"llama-3.1-8b-instruct": 0.544, |
|
"llama-3.1-70b-instruct": 0.8400000000000001, |
|
"llama-3.2-3b-instruct": 0.124, |
|
"llama-3.3-70b-instruct": 0.852, |
|
"mistral-large-instruct-2411": 0.9119999999999999, |
|
"gemma-2-27b-it": 0.672, |
|
"gemma-2-9b-it": 0.376, |
|
"deepseek-v3": 0.992, |
|
"deepseek-r1": 0.952, |
|
"qwq-32b": 0.9879999999999999, |
|
"Average": 0.7125333333333334 |
|
}, |
|
"PharmaceuticalEnv": { |
|
"qwen2.5-3b-instruct": 0.156, |
|
"qwen2.5-7b-instruct": 0.32, |
|
"qwen2.5-14b-instruct": 0.7600000000000001, |
|
"qwen2.5-32b-instruct": 0.752, |
|
"qwen2.5-72b-instruct": 0.7559999999999999, |
|
"llama-3.1-8b-instruct": 0.28400000000000003, |
|
"llama-3.1-70b-instruct": 0.508, |
|
"llama-3.2-3b-instruct": 0.148, |
|
"llama-3.3-70b-instruct": 0.472, |
|
"mistral-large-instruct-2411": 0.756, |
|
"gemma-2-27b-it": 0.336, |
|
"gemma-2-9b-it": 0.128, |
|
"deepseek-v3": 0.8800000000000001, |
|
"deepseek-r1": 0.8640000000000001, |
|
"qwq-32b": 0.8, |
|
"Average": 0.528 |
|
}, |
|
"NetworkEnv": { |
|
"qwen2.5-3b-instruct": 0.184, |
|
"qwen2.5-7b-instruct": 0.36, |
|
"qwen2.5-14b-instruct": 0.66, |
|
"qwen2.5-32b-instruct": 0.716, |
|
"qwen2.5-72b-instruct": 0.716, |
|
"llama-3.1-8b-instruct": 0.43199999999999994, |
|
"llama-3.1-70b-instruct": 0.68, |
|
"llama-3.2-3b-instruct": 0.14400000000000002, |
|
"llama-3.3-70b-instruct": 0.7040000000000001, |
|
"mistral-large-instruct-2411": 0.78, |
|
"gemma-2-27b-it": 0.492, |
|
"gemma-2-9b-it": 0.392, |
|
"deepseek-v3": 0.8400000000000001, |
|
"deepseek-r1": 0.736, |
|
"qwq-32b": 0.828, |
|
"Average": 0.5776 |
|
}, |
|
"BirdNestEnv": { |
|
"qwen2.5-3b-instruct": 0.148, |
|
"qwen2.5-7b-instruct": 0.21200000000000002, |
|
"qwen2.5-14b-instruct": 0.48, |
|
"qwen2.5-32b-instruct": 0.33999999999999997, |
|
"qwen2.5-72b-instruct": 0.42400000000000004, |
|
"llama-3.1-8b-instruct": 0.16799999999999998, |
|
"llama-3.1-70b-instruct": 0.22400000000000003, |
|
"llama-3.2-3b-instruct": 0.084, |
|
"llama-3.3-70b-instruct": 0.20800000000000002, |
|
"mistral-large-instruct-2411": 0.492, |
|
"gemma-2-27b-it": 0.176, |
|
"gemma-2-9b-it": 0.128, |
|
"deepseek-v3": 0.764, |
|
"deepseek-r1": 0.756, |
|
"qwq-32b": 0.8119999999999999, |
|
"Average": 0.36106666666666676 |
|
}, |
|
"EnergyEnv": { |
|
"qwen2.5-3b-instruct": 0.15999999999999998, |
|
"qwen2.5-7b-instruct": 0.42000000000000004, |
|
"qwen2.5-14b-instruct": 0.7999999999999999, |
|
"qwen2.5-32b-instruct": 0.7, |
|
"qwen2.5-72b-instruct": 0.5880000000000001, |
|
"llama-3.1-8b-instruct": 0.29600000000000004, |
|
"llama-3.1-70b-instruct": 0.46799999999999997, |
|
"llama-3.2-3b-instruct": 0.18, |
|
"llama-3.3-70b-instruct": 0.396, |
|
"mistral-large-instruct-2411": 0.78, |
|
"gemma-2-27b-it": 0.35200000000000004, |
|
"gemma-2-9b-it": 0.196, |
|
"deepseek-v3": 0.916, |
|
"deepseek-r1": 0.8720000000000001, |
|
"qwq-32b": 0.8880000000000001, |
|
"Average": 0.5341333333333333 |
|
}, |
|
"LanguageEnv": { |
|
"qwen2.5-3b-instruct": 0.196, |
|
"qwen2.5-7b-instruct": 0.304, |
|
"qwen2.5-14b-instruct": 0.388, |
|
"qwen2.5-32b-instruct": 0.512, |
|
"qwen2.5-72b-instruct": 0.5599999999999999, |
|
"llama-3.1-8b-instruct": 0.23200000000000004, |
|
"llama-3.1-70b-instruct": 0.40800000000000003, |
|
"llama-3.2-3b-instruct": 0.144, |
|
"llama-3.3-70b-instruct": 0.336, |
|
"mistral-large-instruct-2411": 0.536, |
|
"gemma-2-27b-it": 0.20800000000000002, |
|
"gemma-2-9b-it": 0.172, |
|
"deepseek-v3": 0.724, |
|
"deepseek-r1": 0.716, |
|
"qwq-32b": 0.8119999999999999, |
|
"Average": 0.41653333333333337 |
|
}, |
|
"AlgorithmEnv": { |
|
"qwen2.5-3b-instruct": 0.1, |
|
"qwen2.5-7b-instruct": 0.28400000000000003, |
|
"qwen2.5-14b-instruct": 0.688, |
|
"qwen2.5-32b-instruct": 0.6960000000000001, |
|
"qwen2.5-72b-instruct": 0.66, |
|
"llama-3.1-8b-instruct": 0.35200000000000004, |
|
"llama-3.1-70b-instruct": 0.512, |
|
"llama-3.2-3b-instruct": 0.22399999999999998, |
|
"llama-3.3-70b-instruct": 0.484, |
|
"mistral-large-instruct-2411": 0.788, |
|
"gemma-2-27b-it": 0.268, |
|
"gemma-2-9b-it": 0.164, |
|
"deepseek-v3": 0.792, |
|
"deepseek-r1": 0.724, |
|
"qwq-32b": 0.812, |
|
"Average": 0.5032 |
|
}, |
|
"MathematicalEnv": { |
|
"qwen2.5-3b-instruct": 0.048, |
|
"qwen2.5-7b-instruct": 0.42800000000000005, |
|
"qwen2.5-14b-instruct": 0.7000000000000001, |
|
"qwen2.5-32b-instruct": 0.8119999999999999, |
|
"qwen2.5-72b-instruct": 0.792, |
|
"llama-3.1-8b-instruct": 0.316, |
|
"llama-3.1-70b-instruct": 0.8, |
|
"llama-3.2-3b-instruct": 0.12800000000000003, |
|
"llama-3.3-70b-instruct": 0.8400000000000001, |
|
"mistral-large-instruct-2411": 0.884, |
|
"gemma-2-27b-it": 0.268, |
|
"gemma-2-9b-it": 0.068, |
|
"deepseek-v3": 0.9119999999999999, |
|
"deepseek-r1": 0.876, |
|
"qwq-32b": 0.8160000000000001, |
|
"Average": 0.5792 |
|
}, |
|
"MusicalEnv": { |
|
"qwen2.5-3b-instruct": 0.04, |
|
"qwen2.5-7b-instruct": 0.336, |
|
"qwen2.5-14b-instruct": 0.8039999999999999, |
|
"qwen2.5-32b-instruct": 0.8560000000000001, |
|
"qwen2.5-72b-instruct": 0.8400000000000001, |
|
"llama-3.1-8b-instruct": 0.34400000000000003, |
|
"llama-3.1-70b-instruct": 0.68, |
|
"llama-3.2-3b-instruct": 0.088, |
|
"llama-3.3-70b-instruct": 0.8240000000000001, |
|
"mistral-large-instruct-2411": 0.884, |
|
"gemma-2-27b-it": 0.28, |
|
"gemma-2-9b-it": 0.11599999999999999, |
|
"deepseek-v3": 0.9480000000000001, |
|
"deepseek-r1": 0.892, |
|
"qwq-32b": 0.9039999999999999, |
|
"Average": 0.5890666666666668 |
|
}, |
|
"InventorEnv": { |
|
"qwen2.5-3b-instruct": 0.14800000000000002, |
|
"qwen2.5-7b-instruct": 0.43200000000000005, |
|
"qwen2.5-14b-instruct": 0.776, |
|
"qwen2.5-32b-instruct": 0.7999999999999999, |
|
"qwen2.5-72b-instruct": 0.772, |
|
"llama-3.1-8b-instruct": 0.4, |
|
"llama-3.1-70b-instruct": 0.7, |
|
"llama-3.2-3b-instruct": 0.188, |
|
"llama-3.3-70b-instruct": 0.616, |
|
"mistral-large-instruct-2411": 0.8039999999999999, |
|
"gemma-2-27b-it": 0.552, |
|
"gemma-2-9b-it": 0.364, |
|
"deepseek-v3": 0.9399999999999998, |
|
"deepseek-r1": 0.908, |
|
"qwq-32b": 0.9, |
|
"Average": 0.62 |
|
}, |
|
"MedicalEnv": { |
|
"qwen2.5-3b-instruct": 0.22000000000000003, |
|
"qwen2.5-7b-instruct": 0.544, |
|
"qwen2.5-14b-instruct": 0.8320000000000001, |
|
"qwen2.5-32b-instruct": 0.8800000000000001, |
|
"qwen2.5-72b-instruct": 0.8960000000000001, |
|
"llama-3.1-8b-instruct": 0.52, |
|
"llama-3.1-70b-instruct": 0.82, |
|
"llama-3.2-3b-instruct": 0.23200000000000004, |
|
"llama-3.3-70b-instruct": 0.8960000000000001, |
|
"mistral-large-instruct-2411": 0.8960000000000001, |
|
"gemma-2-27b-it": 0.692, |
|
"gemma-2-9b-it": 0.5760000000000001, |
|
"deepseek-v3": 0.9039999999999999, |
|
"deepseek-r1": 0.9359999999999999, |
|
"qwq-32b": 0.9199999999999999, |
|
"Average": 0.7175999999999999 |
|
}, |
|
"MusicEnv": { |
|
"qwen2.5-3b-instruct": 0.184, |
|
"qwen2.5-7b-instruct": 0.26, |
|
"qwen2.5-14b-instruct": 0.656, |
|
"qwen2.5-32b-instruct": 0.664, |
|
"qwen2.5-72b-instruct": 0.7559999999999999, |
|
"llama-3.1-8b-instruct": 0.356, |
|
"llama-3.1-70b-instruct": 0.596, |
|
"llama-3.2-3b-instruct": 0.10800000000000001, |
|
"llama-3.3-70b-instruct": 0.596, |
|
"mistral-large-instruct-2411": 0.6639999999999999, |
|
"gemma-2-27b-it": 0.45600000000000007, |
|
"gemma-2-9b-it": 0.28400000000000003, |
|
"deepseek-v3": 0.8119999999999999, |
|
"deepseek-r1": 0.868, |
|
"qwq-32b": 0.868, |
|
"Average": 0.5418666666666667 |
|
}, |
|
"FantasyEnv": { |
|
"qwen2.5-3b-instruct": 0.148, |
|
"qwen2.5-7b-instruct": 0.32, |
|
"qwen2.5-14b-instruct": 0.74, |
|
"qwen2.5-32b-instruct": 0.7879999999999999, |
|
"qwen2.5-72b-instruct": 0.5720000000000001, |
|
"llama-3.1-8b-instruct": 0.40800000000000003, |
|
"llama-3.1-70b-instruct": 0.676, |
|
"llama-3.2-3b-instruct": 0.152, |
|
"llama-3.3-70b-instruct": 0.704, |
|
"mistral-large-instruct-2411": 0.8240000000000001, |
|
"gemma-2-27b-it": 0.524, |
|
"gemma-2-9b-it": 0.324, |
|
"deepseek-v3": 0.9199999999999999, |
|
"deepseek-r1": 0.9719999999999999, |
|
"qwq-32b": 0.9719999999999999, |
|
"Average": 0.6029333333333332 |
|
}, |
|
"EducationEnv": { |
|
"qwen2.5-3b-instruct": 0.10400000000000001, |
|
"qwen2.5-7b-instruct": 0.268, |
|
"qwen2.5-14b-instruct": 0.828, |
|
"qwen2.5-32b-instruct": 0.9039999999999999, |
|
"qwen2.5-72b-instruct": 0.8480000000000001, |
|
"llama-3.1-8b-instruct": 0.5680000000000001, |
|
"llama-3.1-70b-instruct": 0.768, |
|
"llama-3.2-3b-instruct": 0.192, |
|
"llama-3.3-70b-instruct": 0.9039999999999999, |
|
"mistral-large-instruct-2411": 0.876, |
|
"gemma-2-27b-it": 0.624, |
|
"gemma-2-9b-it": 0.45999999999999996, |
|
"deepseek-v3": 0.9480000000000001, |
|
"deepseek-r1": 0.9, |
|
"qwq-32b": 0.9359999999999999, |
|
"Average": 0.6752 |
|
}, |
|
"ChemicalEnv": { |
|
"qwen2.5-3b-instruct": 0.264, |
|
"qwen2.5-7b-instruct": 0.44000000000000006, |
|
"qwen2.5-14b-instruct": 0.724, |
|
"qwen2.5-32b-instruct": 0.7040000000000001, |
|
"qwen2.5-72b-instruct": 0.72, |
|
"llama-3.1-8b-instruct": 0.36, |
|
"llama-3.1-70b-instruct": 0.62, |
|
"llama-3.2-3b-instruct": 0.16399999999999998, |
|
"llama-3.3-70b-instruct": 0.45999999999999996, |
|
"mistral-large-instruct-2411": 0.68, |
|
"gemma-2-27b-it": 0.44399999999999995, |
|
"gemma-2-9b-it": 0.316, |
|
"deepseek-v3": 0.8799999999999999, |
|
"deepseek-r1": 0.6799999999999999, |
|
"qwq-32b": 0.8200000000000001, |
|
"Average": 0.5517333333333333 |
|
}, |
|
"Average": { |
|
"qwen2.5-3b-instruct": 0.1655841584158416, |
|
"qwen2.5-7b-instruct": 0.34736633663366323, |
|
"qwen2.5-14b-instruct": 0.7148514851485149, |
|
"qwen2.5-32b-instruct": 0.7330693069306928, |
|
"qwen2.5-72b-instruct": 0.7272079207920793, |
|
"llama-3.1-8b-instruct": 0.3334653465346535, |
|
"llama-3.1-70b-instruct": 0.6271287128712871, |
|
"llama-3.2-3b-instruct": 0.15599999999999997, |
|
"llama-3.3-70b-instruct": 0.6372277227722771, |
|
"mistral-large-instruct-2411": 0.7573861386138615, |
|
"gemma-2-27b-it": 0.44522772277227735, |
|
"gemma-2-9b-it": 0.264, |
|
"deepseek-v3": 0.8605148514851484, |
|
"deepseek-r1": 0.8304554455445546, |
|
"qwq-32b": 0.8630891089108911 |
|
} |
|
} |