{ "RelicEnv": { "qwen2.5-3b-instruct": 0.18, "qwen2.5-7b-instruct": 0.396, "qwen2.5-14b-instruct": 0.8, "qwen2.5-32b-instruct": 0.8560000000000001, "qwen2.5-72b-instruct": 0.892, "llama-3.1-8b-instruct": 0.21600000000000003, "llama-3.1-70b-instruct": 0.6639999999999999, "llama-3.2-3b-instruct": 0.164, "llama-3.3-70b-instruct": 0.836, "mistral-large-instruct-2411": 0.8560000000000001, "gemma-2-27b-it": 0.544, "gemma-2-9b-it": 0.36400000000000005, "deepseek-v3": 0.9359999999999999, "deepseek-r1": 0.916, "qwq-32b": 0.9560000000000001, "Average": 0.6384 }, "HerbEnv": { "qwen2.5-3b-instruct": 0.184, "qwen2.5-7b-instruct": 0.304, "qwen2.5-14b-instruct": 0.784, "qwen2.5-32b-instruct": 0.8400000000000001, "qwen2.5-72b-instruct": 0.8039999999999999, "llama-3.1-8b-instruct": 0.30000000000000004, "llama-3.1-70b-instruct": 0.568, "llama-3.2-3b-instruct": 0.128, "llama-3.3-70b-instruct": 0.612, "mistral-large-instruct-2411": 0.76, "gemma-2-27b-it": 0.504, "gemma-2-9b-it": 0.18000000000000002, "deepseek-v3": 0.968, "deepseek-r1": 0.9359999999999999, "qwq-32b": 0.924, "Average": 0.5863999999999999 }, "TransdimensionalEnv": { "qwen2.5-3b-instruct": 0.156, "qwen2.5-7b-instruct": 0.38400000000000006, "qwen2.5-14b-instruct": 0.836, "qwen2.5-32b-instruct": 0.876, "qwen2.5-72b-instruct": 0.836, "llama-3.1-8b-instruct": 0.44399999999999995, "llama-3.1-70b-instruct": 0.828, "llama-3.2-3b-instruct": 0.172, "llama-3.3-70b-instruct": 0.86, "mistral-large-instruct-2411": 0.86, "gemma-2-27b-it": 0.5599999999999999, "gemma-2-9b-it": 0.516, "deepseek-v3": 0.968, "deepseek-r1": 0.9359999999999999, "qwq-32b": 0.968, "Average": 0.6799999999999999 }, "SorcererEnv": { "qwen2.5-3b-instruct": 0.16, "qwen2.5-7b-instruct": 0.32400000000000007, "qwen2.5-14b-instruct": 0.8039999999999999, "qwen2.5-32b-instruct": 0.8240000000000001, "qwen2.5-72b-instruct": 0.8320000000000001, "llama-3.1-8b-instruct": 0.276, "llama-3.1-70b-instruct": 0.6639999999999999, "llama-3.2-3b-instruct": 0.196, "llama-3.3-70b-instruct": 0.7360000000000001, "mistral-large-instruct-2411": 0.8, "gemma-2-27b-it": 0.5640000000000001, "gemma-2-9b-it": 0.28800000000000003, "deepseek-v3": 0.8640000000000001, "deepseek-r1": 0.8240000000000001, "qwq-32b": 0.8400000000000001, "Average": 0.5997333333333333 }, "QuantumEnv": { "qwen2.5-3b-instruct": 0.196, "qwen2.5-7b-instruct": 0.532, "qwen2.5-14b-instruct": 0.8720000000000001, "qwen2.5-32b-instruct": 0.9039999999999999, "qwen2.5-72b-instruct": 0.916, "llama-3.1-8b-instruct": 0.45600000000000007, "llama-3.1-70b-instruct": 0.7999999999999999, "llama-3.2-3b-instruct": 0.168, "llama-3.3-70b-instruct": 0.8480000000000001, "mistral-large-instruct-2411": 0.8720000000000001, "gemma-2-27b-it": 0.744, "gemma-2-9b-it": 0.544, "deepseek-v3": 0.884, "deepseek-r1": 0.8640000000000001, "qwq-32b": 0.868, "Average": 0.6978666666666666 }, "AstronomyEnv": { "qwen2.5-3b-instruct": 0.172, "qwen2.5-7b-instruct": 0.42800000000000005, "qwen2.5-14b-instruct": 0.716, "qwen2.5-32b-instruct": 0.676, "qwen2.5-72b-instruct": 0.748, "llama-3.1-8b-instruct": 0.336, "llama-3.1-70b-instruct": 0.692, "llama-3.2-3b-instruct": 0.176, "llama-3.3-70b-instruct": 0.6519999999999999, "mistral-large-instruct-2411": 0.7999999999999999, "gemma-2-27b-it": 0.508, "gemma-2-9b-it": 0.372, "deepseek-v3": 0.748, "deepseek-r1": 0.8200000000000001, "qwq-32b": 0.852, "Average": 0.5797333333333333 }, "MusicGenresEnv": { "qwen2.5-3b-instruct": 0.22000000000000003, "qwen2.5-7b-instruct": 0.42000000000000004, "qwen2.5-14b-instruct": 0.72, "qwen2.5-32b-instruct": 0.716, "qwen2.5-72b-instruct": 0.696, "llama-3.1-8b-instruct": 0.35200000000000004, "llama-3.1-70b-instruct": 0.6280000000000001, "llama-3.2-3b-instruct": 0.136, "llama-3.3-70b-instruct": 0.592, "mistral-large-instruct-2411": 0.732, "gemma-2-27b-it": 0.44800000000000006, "gemma-2-9b-it": 0.332, "deepseek-v3": 0.748, "deepseek-r1": 0.792, "qwq-32b": 0.876, "Average": 0.5605333333333334 }, "CloudEnv": { "qwen2.5-3b-instruct": 0.21199999999999997, "qwen2.5-7b-instruct": 0.42000000000000004, "qwen2.5-14b-instruct": 0.76, "qwen2.5-32b-instruct": 0.656, "qwen2.5-72b-instruct": 0.712, "llama-3.1-8b-instruct": 0.42000000000000004, "llama-3.1-70b-instruct": 0.664, "llama-3.2-3b-instruct": 0.22800000000000004, "llama-3.3-70b-instruct": 0.696, "mistral-large-instruct-2411": 0.8360000000000001, "gemma-2-27b-it": 0.6, "gemma-2-9b-it": 0.4, "deepseek-v3": 0.8200000000000001, "deepseek-r1": 0.908, "qwq-32b": 0.9120000000000001, "Average": 0.6162666666666667 }, "CuisineEnv": { "qwen2.5-3b-instruct": 0.21600000000000003, "qwen2.5-7b-instruct": 0.316, "qwen2.5-14b-instruct": 0.6960000000000001, "qwen2.5-32b-instruct": 0.664, "qwen2.5-72b-instruct": 0.656, "llama-3.1-8b-instruct": 0.22799999999999998, "llama-3.1-70b-instruct": 0.476, "llama-3.2-3b-instruct": 0.152, "llama-3.3-70b-instruct": 0.44400000000000006, "mistral-large-instruct-2411": 0.644, "gemma-2-27b-it": 0.27599999999999997, "gemma-2-9b-it": 0.156, "deepseek-v3": 0.8400000000000001, "deepseek-r1": 0.7959999999999999, "qwq-32b": 0.8800000000000001, "Average": 0.49599999999999994 }, "PlantEnv": { "qwen2.5-3b-instruct": 0.168, "qwen2.5-7b-instruct": 0.236, "qwen2.5-14b-instruct": 0.34, "qwen2.5-32b-instruct": 0.22000000000000003, "qwen2.5-72b-instruct": 0.22799999999999998, "llama-3.1-8b-instruct": 0.148, "llama-3.1-70b-instruct": 0.16, "llama-3.2-3b-instruct": 0.084, "llama-3.3-70b-instruct": 0.07599999999999998, "mistral-large-instruct-2411": 0.264, "gemma-2-27b-it": 0.14400000000000002, "gemma-2-9b-it": 0.092, "deepseek-v3": 0.512, "deepseek-r1": 0.5, "qwq-32b": 0.548, "Average": 0.24800000000000003 }, "HistoricalEnv": { "qwen2.5-3b-instruct": 0.24, "qwen2.5-7b-instruct": 0.368, "qwen2.5-14b-instruct": 0.5800000000000001, "qwen2.5-32b-instruct": 0.476, "qwen2.5-72b-instruct": 0.512, "llama-3.1-8b-instruct": 0.332, "llama-3.1-70b-instruct": 0.616, "llama-3.2-3b-instruct": 0.2, "llama-3.3-70b-instruct": 0.652, "mistral-large-instruct-2411": 0.6880000000000001, "gemma-2-27b-it": 0.5, "gemma-2-9b-it": 0.376, "deepseek-v3": 0.748, "deepseek-r1": 0.828, "qwq-32b": 0.884, "Average": 0.5333333333333334 }, "GadgetEnv": { "qwen2.5-3b-instruct": 0.124, "qwen2.5-7b-instruct": 0.312, "qwen2.5-14b-instruct": 0.852, "qwen2.5-32b-instruct": 0.8640000000000001, "qwen2.5-72b-instruct": 0.892, "llama-3.1-8b-instruct": 0.284, "llama-3.1-70b-instruct": 0.692, "llama-3.2-3b-instruct": 0.11200000000000002, "llama-3.3-70b-instruct": 0.7360000000000001, "mistral-large-instruct-2411": 0.884, "gemma-2-27b-it": 0.32799999999999996, "gemma-2-9b-it": 0.184, "deepseek-v3": 0.9640000000000001, "deepseek-r1": 0.932, "qwq-32b": 0.932, "Average": 0.6061333333333334 }, "TimeTravelEnv": { "qwen2.5-3b-instruct": 0.128, "qwen2.5-7b-instruct": 0.292, "qwen2.5-14b-instruct": 0.808, "qwen2.5-32b-instruct": 0.828, "qwen2.5-72b-instruct": 0.8039999999999999, "llama-3.1-8b-instruct": 0.376, "llama-3.1-70b-instruct": 0.684, "llama-3.2-3b-instruct": 0.124, "llama-3.3-70b-instruct": 0.716, "mistral-large-instruct-2411": 0.884, "gemma-2-27b-it": 0.32799999999999996, "gemma-2-9b-it": 0.21600000000000003, "deepseek-v3": 0.9399999999999998, "deepseek-r1": 0.932, "qwq-32b": 0.924, "Average": 0.5989333333333333 }, "PollutionEnv": { "qwen2.5-3b-instruct": 0.136, "qwen2.5-7b-instruct": 0.328, "qwen2.5-14b-instruct": 0.792, "qwen2.5-32b-instruct": 0.7120000000000001, "qwen2.5-72b-instruct": 0.704, "llama-3.1-8b-instruct": 0.316, "llama-3.1-70b-instruct": 0.664, "llama-3.2-3b-instruct": 0.124, "llama-3.3-70b-instruct": 0.6960000000000001, "mistral-large-instruct-2411": 0.784, "gemma-2-27b-it": 0.336, "gemma-2-9b-it": 0.252, "deepseek-v3": 0.8640000000000001, "deepseek-r1": 0.8560000000000001, "qwq-32b": 0.852, "Average": 0.5610666666666666 }, "DemographicEnv": { "qwen2.5-3b-instruct": 0.072, "qwen2.5-7b-instruct": 0.42800000000000005, "qwen2.5-14b-instruct": 0.68, "qwen2.5-32b-instruct": 0.7799999999999999, "qwen2.5-72b-instruct": 0.7719999999999999, "llama-3.1-8b-instruct": 0.272, "llama-3.1-70b-instruct": 0.6239999999999999, "llama-3.2-3b-instruct": 0.176, "llama-3.3-70b-instruct": 0.748, "mistral-large-instruct-2411": 0.8200000000000001, "gemma-2-27b-it": 0.356, "gemma-2-9b-it": 0.156, "deepseek-v3": 0.8960000000000001, "deepseek-r1": 0.876, "qwq-32b": 0.8960000000000001, "Average": 0.5701333333333333 }, "GeneticEnv": { "qwen2.5-3b-instruct": 0.084, "qwen2.5-7b-instruct": 0.392, "qwen2.5-14b-instruct": 0.884, "qwen2.5-32b-instruct": 0.9279999999999999, "qwen2.5-72b-instruct": 0.9400000000000001, "llama-3.1-8b-instruct": 0.45999999999999996, "llama-3.1-70b-instruct": 0.9, "llama-3.2-3b-instruct": 0.192, "llama-3.3-70b-instruct": 0.916, "mistral-large-instruct-2411": 0.9040000000000001, "gemma-2-27b-it": 0.776, "gemma-2-9b-it": 0.548, "deepseek-v3": 0.984, "deepseek-r1": 0.952, "qwq-32b": 0.932, "Average": 0.7194666666666667 }, "CraftsmanEnv": { "qwen2.5-3b-instruct": 0.14400000000000002, "qwen2.5-7b-instruct": 0.256, "qwen2.5-14b-instruct": 0.624, "qwen2.5-32b-instruct": 0.736, "qwen2.5-72b-instruct": 0.664, "llama-3.1-8b-instruct": 0.22000000000000003, "llama-3.1-70b-instruct": 0.524, "llama-3.2-3b-instruct": 0.10800000000000001, "llama-3.3-70b-instruct": 0.41600000000000004, "mistral-large-instruct-2411": 0.7080000000000001, "gemma-2-27b-it": 0.324, "gemma-2-9b-it": 0.096, "deepseek-v3": 0.9, "deepseek-r1": 0.7879999999999999, "qwq-32b": 0.8160000000000001, "Average": 0.4882666666666667 }, "StarConstellationEnv": { "qwen2.5-3b-instruct": 0.1, "qwen2.5-7b-instruct": 0.332, "qwen2.5-14b-instruct": 0.5960000000000001, "qwen2.5-32b-instruct": 0.572, "qwen2.5-72b-instruct": 0.5840000000000001, "llama-3.1-8b-instruct": 0.376, "llama-3.1-70b-instruct": 0.4640000000000001, "llama-3.2-3b-instruct": 0.136, "llama-3.3-70b-instruct": 0.41200000000000003, "mistral-large-instruct-2411": 0.6120000000000001, "gemma-2-27b-it": 0.472, "gemma-2-9b-it": 0.22799999999999998, "deepseek-v3": 0.744, "deepseek-r1": 0.748, "qwq-32b": 0.736, "Average": 0.47413333333333335 }, "MythicalCreatureEnv": { "qwen2.5-3b-instruct": 0.2, "qwen2.5-7b-instruct": 0.324, "qwen2.5-14b-instruct": 0.632, "qwen2.5-32b-instruct": 0.712, "qwen2.5-72b-instruct": 0.668, "llama-3.1-8b-instruct": 0.31200000000000006, "llama-3.1-70b-instruct": 0.62, "llama-3.2-3b-instruct": 0.11200000000000002, "llama-3.3-70b-instruct": 0.648, "mistral-large-instruct-2411": 0.7480000000000001, "gemma-2-27b-it": 0.42799999999999994, "gemma-2-9b-it": 0.268, "deepseek-v3": 0.8400000000000001, "deepseek-r1": 0.8400000000000001, "qwq-32b": 0.852, "Average": 0.5469333333333333 }, "ArtStyleEnv": { "qwen2.5-3b-instruct": 0.136, "qwen2.5-7b-instruct": 0.332, "qwen2.5-14b-instruct": 0.78, "qwen2.5-32b-instruct": 0.8320000000000001, "qwen2.5-72b-instruct": 0.748, "llama-3.1-8b-instruct": 0.356, "llama-3.1-70b-instruct": 0.616, "llama-3.2-3b-instruct": 0.17200000000000001, "llama-3.3-70b-instruct": 0.6199999999999999, "mistral-large-instruct-2411": 0.828, "gemma-2-27b-it": 0.43200000000000005, "gemma-2-9b-it": 0.256, "deepseek-v3": 0.876, "deepseek-r1": 0.8200000000000001, "qwq-32b": 0.868, "Average": 0.5781333333333335 }, "CookingEnv": { "qwen2.5-3b-instruct": 0.13999999999999999, "qwen2.5-7b-instruct": 0.44799999999999995, "qwen2.5-14b-instruct": 0.76, "qwen2.5-32b-instruct": 0.7440000000000001, "qwen2.5-72b-instruct": 0.7, "llama-3.1-8b-instruct": 0.364, "llama-3.1-70b-instruct": 0.6839999999999999, "llama-3.2-3b-instruct": 0.156, "llama-3.3-70b-instruct": 0.656, "mistral-large-instruct-2411": 0.74, "gemma-2-27b-it": 0.48, "gemma-2-9b-it": 0.364, "deepseek-v3": 0.8640000000000001, "deepseek-r1": 0.812, "qwq-32b": 0.9, "Average": 0.5874666666666666 }, "HistoricalBattleEnv": { "qwen2.5-3b-instruct": 0.256, "qwen2.5-7b-instruct": 0.292, "qwen2.5-14b-instruct": 0.45999999999999996, "qwen2.5-32b-instruct": 0.476, "qwen2.5-72b-instruct": 0.42400000000000004, "llama-3.1-8b-instruct": 0.28400000000000003, "llama-3.1-70b-instruct": 0.492, "llama-3.2-3b-instruct": 0.148, "llama-3.3-70b-instruct": 0.62, "mistral-large-instruct-2411": 0.608, "gemma-2-27b-it": 0.388, "gemma-2-9b-it": 0.34, "deepseek-v3": 0.724, "deepseek-r1": 0.788, "qwq-32b": 0.8560000000000001, "Average": 0.47706666666666664 }, "FungalEnv": { "qwen2.5-3b-instruct": 0.15999999999999998, "qwen2.5-7b-instruct": 0.46399999999999997, "qwen2.5-14b-instruct": 0.664, "qwen2.5-32b-instruct": 0.728, "qwen2.5-72b-instruct": 0.6839999999999999, "llama-3.1-8b-instruct": 0.41600000000000004, "llama-3.1-70b-instruct": 0.5840000000000001, "llama-3.2-3b-instruct": 0.14, "llama-3.3-70b-instruct": 0.644, "mistral-large-instruct-2411": 0.7440000000000001, "gemma-2-27b-it": 0.536, "gemma-2-9b-it": 0.184, "deepseek-v3": 0.844, "deepseek-r1": 0.764, "qwq-32b": 0.7879999999999999, "Average": 0.5562666666666666 }, "CryptographyEnv": { "qwen2.5-3b-instruct": 0.24000000000000005, "qwen2.5-7b-instruct": 0.23199999999999998, "qwen2.5-14b-instruct": 0.508, "qwen2.5-32b-instruct": 0.5760000000000001, "qwen2.5-72b-instruct": 0.528, "llama-3.1-8b-instruct": 0.29600000000000004, "llama-3.1-70b-instruct": 0.524, "llama-3.2-3b-instruct": 0.11600000000000002, "llama-3.3-70b-instruct": 0.512, "mistral-large-instruct-2411": 0.6799999999999999, "gemma-2-27b-it": 0.328, "gemma-2-9b-it": 0.192, "deepseek-v3": 0.784, "deepseek-r1": 0.74, "qwq-32b": 0.8480000000000001, "Average": 0.4736 }, "StorageEnv": { "qwen2.5-3b-instruct": 0.22800000000000004, "qwen2.5-7b-instruct": 0.44000000000000006, "qwen2.5-14b-instruct": 0.852, "qwen2.5-32b-instruct": 0.884, "qwen2.5-72b-instruct": 0.8119999999999999, "llama-3.1-8b-instruct": 0.34800000000000003, "llama-3.1-70b-instruct": 0.724, "llama-3.2-3b-instruct": 0.21600000000000003, "llama-3.3-70b-instruct": 0.796, "mistral-large-instruct-2411": 0.8880000000000001, "gemma-2-27b-it": 0.596, "gemma-2-9b-it": 0.392, "deepseek-v3": 0.9640000000000001, "deepseek-r1": 0.9119999999999999, "qwq-32b": 0.944, "Average": 0.6663999999999999 }, "RoverEnv": { "qwen2.5-3b-instruct": 0.14400000000000002, "qwen2.5-7b-instruct": 0.236, "qwen2.5-14b-instruct": 0.8480000000000001, "qwen2.5-32b-instruct": 0.8360000000000001, "qwen2.5-72b-instruct": 0.796, "llama-3.1-8b-instruct": 0.28400000000000003, "llama-3.1-70b-instruct": 0.612, "llama-3.2-3b-instruct": 0.148, "llama-3.3-70b-instruct": 0.724, "mistral-large-instruct-2411": 0.828, "gemma-2-27b-it": 0.4600000000000001, "gemma-2-9b-it": 0.072, "deepseek-v3": 0.9200000000000002, "deepseek-r1": 0.9, "qwq-32b": 0.8720000000000001, "Average": 0.5786666666666668 }, "FashionEnv": { "qwen2.5-3b-instruct": 0.17200000000000001, "qwen2.5-7b-instruct": 0.304, "qwen2.5-14b-instruct": 0.8240000000000001, "qwen2.5-32b-instruct": 0.808, "qwen2.5-72b-instruct": 0.768, "llama-3.1-8b-instruct": 0.32, "llama-3.1-70b-instruct": 0.6, "llama-3.2-3b-instruct": 0.16399999999999998, "llama-3.3-70b-instruct": 0.6160000000000001, "mistral-large-instruct-2411": 0.756, "gemma-2-27b-it": 0.524, "gemma-2-9b-it": 0.292, "deepseek-v3": 0.86, "deepseek-r1": 0.756, "qwq-32b": 0.86, "Average": 0.5749333333333334 }, "LicenseEnv": { "qwen2.5-3b-instruct": 0.196, "qwen2.5-7b-instruct": 0.29200000000000004, "qwen2.5-14b-instruct": 0.556, "qwen2.5-32b-instruct": 0.44000000000000006, "qwen2.5-72b-instruct": 0.484, "llama-3.1-8b-instruct": 0.26, "llama-3.1-70b-instruct": 0.496, "llama-3.2-3b-instruct": 0.072, "llama-3.3-70b-instruct": 0.45999999999999996, "mistral-large-instruct-2411": 0.504, "gemma-2-27b-it": 0.37600000000000006, "gemma-2-9b-it": 0.296, "deepseek-v3": 0.556, "deepseek-r1": 0.52, "qwq-32b": 0.5800000000000001, "Average": 0.4058666666666667 }, "VirusClassificationEnv": { "qwen2.5-3b-instruct": 0.22000000000000003, "qwen2.5-7b-instruct": 0.28, "qwen2.5-14b-instruct": 0.384, "qwen2.5-32b-instruct": 0.38, "qwen2.5-72b-instruct": 0.42800000000000005, "llama-3.1-8b-instruct": 0.256, "llama-3.1-70b-instruct": 0.332, "llama-3.2-3b-instruct": 0.156, "llama-3.3-70b-instruct": 0.396, "mistral-large-instruct-2411": 0.532, "gemma-2-27b-it": 0.34, "gemma-2-9b-it": 0.31200000000000006, "deepseek-v3": 0.536, "deepseek-r1": 0.64, "qwq-32b": 0.684, "Average": 0.3917333333333333 }, "TestingEnv": { "qwen2.5-3b-instruct": 0.19200000000000003, "qwen2.5-7b-instruct": 0.22000000000000003, "qwen2.5-14b-instruct": 0.608, "qwen2.5-32b-instruct": 0.648, "qwen2.5-72b-instruct": 0.708, "llama-3.1-8b-instruct": 0.332, "llama-3.1-70b-instruct": 0.68, "llama-3.2-3b-instruct": 0.17200000000000001, "llama-3.3-70b-instruct": 0.7040000000000001, "mistral-large-instruct-2411": 0.764, "gemma-2-27b-it": 0.22799999999999998, "gemma-2-9b-it": 0.26, "deepseek-v3": 0.8880000000000001, "deepseek-r1": 0.764, "qwq-32b": 0.7999999999999999, "Average": 0.5312 }, "NarrativeDetectEnv": { "qwen2.5-3b-instruct": 0.148, "qwen2.5-7b-instruct": 0.30000000000000004, "qwen2.5-14b-instruct": 0.552, "qwen2.5-32b-instruct": 0.8440000000000001, "qwen2.5-72b-instruct": 0.76, "llama-3.1-8b-instruct": 0.28800000000000003, "llama-3.1-70b-instruct": 0.6279999999999999, "llama-3.2-3b-instruct": 0.10400000000000001, "llama-3.3-70b-instruct": 0.704, "mistral-large-instruct-2411": 0.7919999999999999, "gemma-2-27b-it": 0.328, "gemma-2-9b-it": 0.192, "deepseek-v3": 0.8560000000000001, "deepseek-r1": 0.748, "qwq-32b": 0.784, "Average": 0.5352 }, "RenewableEnergyEnv": { "qwen2.5-3b-instruct": 0.184, "qwen2.5-7b-instruct": 0.44399999999999995, "qwen2.5-14b-instruct": 0.648, "qwen2.5-32b-instruct": 0.932, "qwen2.5-72b-instruct": 0.8880000000000001, "llama-3.1-8b-instruct": 0.396, "llama-3.1-70b-instruct": 0.812, "llama-3.2-3b-instruct": 0.2, "llama-3.3-70b-instruct": 0.8240000000000001, "mistral-large-instruct-2411": 0.8560000000000001, "gemma-2-27b-it": 0.348, "gemma-2-9b-it": 0.188, "deepseek-v3": 0.96, "deepseek-r1": 0.9800000000000001, "qwq-32b": 0.9800000000000001, "Average": 0.6426666666666667 }, "CelestialEnv": { "qwen2.5-3b-instruct": 0.20400000000000001, "qwen2.5-7b-instruct": 0.252, "qwen2.5-14b-instruct": 0.728, "qwen2.5-32b-instruct": 0.792, "qwen2.5-72b-instruct": 0.7239999999999999, "llama-3.1-8b-instruct": 0.256, "llama-3.1-70b-instruct": 0.6920000000000001, "llama-3.2-3b-instruct": 0.192, "llama-3.3-70b-instruct": 0.744, "mistral-large-instruct-2411": 0.82, "gemma-2-27b-it": 0.528, "gemma-2-9b-it": 0.344, "deepseek-v3": 0.8480000000000001, "deepseek-r1": 0.8360000000000001, "qwq-32b": 0.8879999999999999, "Average": 0.5898666666666668 }, "SpiceEnv": { "qwen2.5-3b-instruct": 0.21199999999999997, "qwen2.5-7b-instruct": 0.332, "qwen2.5-14b-instruct": 0.672, "qwen2.5-32b-instruct": 0.476, "qwen2.5-72b-instruct": 0.5880000000000001, "llama-3.1-8b-instruct": 0.32799999999999996, "llama-3.1-70b-instruct": 0.40800000000000003, "llama-3.2-3b-instruct": 0.22000000000000003, "llama-3.3-70b-instruct": 0.336, "mistral-large-instruct-2411": 0.5800000000000001, "gemma-2-27b-it": 0.28400000000000003, "gemma-2-9b-it": 0.172, "deepseek-v3": 0.908, "deepseek-r1": 0.7679999999999999, "qwq-32b": 0.8720000000000001, "Average": 0.47706666666666664 }, "WildlifeEnv": { "qwen2.5-3b-instruct": 0.21600000000000003, "qwen2.5-7b-instruct": 0.352, "qwen2.5-14b-instruct": 0.644, "qwen2.5-32b-instruct": 0.592, "qwen2.5-72b-instruct": 0.616, "llama-3.1-8b-instruct": 0.316, "llama-3.1-70b-instruct": 0.544, "llama-3.2-3b-instruct": 0.23199999999999998, "llama-3.3-70b-instruct": 0.616, "mistral-large-instruct-2411": 0.628, "gemma-2-27b-it": 0.45199999999999996, "gemma-2-9b-it": 0.344, "deepseek-v3": 0.736, "deepseek-r1": 0.6040000000000001, "qwq-32b": 0.716, "Average": 0.5072 }, "VehicleEnv": { "qwen2.5-3b-instruct": 0.172, "qwen2.5-7b-instruct": 0.308, "qwen2.5-14b-instruct": 0.54, "qwen2.5-32b-instruct": 0.776, "qwen2.5-72b-instruct": 0.78, "llama-3.1-8b-instruct": 0.248, "llama-3.1-70b-instruct": 0.62, "llama-3.2-3b-instruct": 0.152, "llama-3.3-70b-instruct": 0.6960000000000001, "mistral-large-instruct-2411": 0.8800000000000001, "gemma-2-27b-it": 0.44799999999999995, "gemma-2-9b-it": 0.248, "deepseek-v3": 0.9199999999999999, "deepseek-r1": 0.9199999999999999, "qwq-32b": 0.916, "Average": 0.5749333333333333 }, "BeverageEnv": { "qwen2.5-3b-instruct": 0.128, "qwen2.5-7b-instruct": 0.296, "qwen2.5-14b-instruct": 0.792, "qwen2.5-32b-instruct": 0.6880000000000001, "qwen2.5-72b-instruct": 0.724, "llama-3.1-8b-instruct": 0.41200000000000003, "llama-3.1-70b-instruct": 0.6199999999999999, "llama-3.2-3b-instruct": 0.16399999999999998, "llama-3.3-70b-instruct": 0.5800000000000001, "mistral-large-instruct-2411": 0.748, "gemma-2-27b-it": 0.40800000000000003, "gemma-2-9b-it": 0.296, "deepseek-v3": 0.8800000000000001, "deepseek-r1": 0.7520000000000001, "qwq-32b": 0.844, "Average": 0.5554666666666667 }, "ControlEnv": { "qwen2.5-3b-instruct": 0.12800000000000003, "qwen2.5-7b-instruct": 0.364, "qwen2.5-14b-instruct": 0.68, "qwen2.5-32b-instruct": 0.8320000000000001, "qwen2.5-72b-instruct": 0.8400000000000001, "llama-3.1-8b-instruct": 0.364, "llama-3.1-70b-instruct": 0.656, "llama-3.2-3b-instruct": 0.15599999999999997, "llama-3.3-70b-instruct": 0.6320000000000001, "mistral-large-instruct-2411": 0.784, "gemma-2-27b-it": 0.4640000000000001, "gemma-2-9b-it": 0.18, "deepseek-v3": 0.9119999999999999, "deepseek-r1": 0.9119999999999999, "qwq-32b": 0.932, "Average": 0.5890666666666665 }, "CurrencyEnv": { "qwen2.5-3b-instruct": 0.252, "qwen2.5-7b-instruct": 0.392, "qwen2.5-14b-instruct": 0.8560000000000001, "qwen2.5-32b-instruct": 0.884, "qwen2.5-72b-instruct": 0.836, "llama-3.1-8b-instruct": 0.476, "llama-3.1-70b-instruct": 0.7520000000000001, "llama-3.2-3b-instruct": 0.22400000000000003, "llama-3.3-70b-instruct": 0.7000000000000001, "mistral-large-instruct-2411": 0.8960000000000001, "gemma-2-27b-it": 0.68, "gemma-2-9b-it": 0.196, "deepseek-v3": 0.9800000000000001, "deepseek-r1": 0.932, "qwq-32b": 0.9640000000000001, "Average": 0.668 }, "MarketingEnv": { "qwen2.5-3b-instruct": 0.12, "qwen2.5-7b-instruct": 0.34400000000000003, "qwen2.5-14b-instruct": 0.524, "qwen2.5-32b-instruct": 0.7479999999999999, "qwen2.5-72b-instruct": 0.732, "llama-3.1-8b-instruct": 0.30800000000000005, "llama-3.1-70b-instruct": 0.7040000000000001, "llama-3.2-3b-instruct": 0.14400000000000002, "llama-3.3-70b-instruct": 0.7639999999999999, "mistral-large-instruct-2411": 0.7600000000000001, "gemma-2-27b-it": 0.32400000000000007, "gemma-2-9b-it": 0.184, "deepseek-v3": 0.812, "deepseek-r1": 0.7959999999999999, "qwq-32b": 0.8320000000000001, "Average": 0.5397333333333333 }, "BotanicalEnv": { "qwen2.5-3b-instruct": 0.18800000000000003, "qwen2.5-7b-instruct": 0.316, "qwen2.5-14b-instruct": 0.9119999999999999, "qwen2.5-32b-instruct": 0.884, "qwen2.5-72b-instruct": 0.9039999999999999, "llama-3.1-8b-instruct": 0.4119999999999999, "llama-3.1-70b-instruct": 0.836, "llama-3.2-3b-instruct": 0.23600000000000004, "llama-3.3-70b-instruct": 0.8480000000000001, "mistral-large-instruct-2411": 0.8640000000000001, "gemma-2-27b-it": 0.604, "gemma-2-9b-it": 0.264, "deepseek-v3": 0.9040000000000001, "deepseek-r1": 0.9399999999999998, "qwq-32b": 0.968, "Average": 0.672 }, "CircusActEnv": { "qwen2.5-3b-instruct": 0.17200000000000001, "qwen2.5-7b-instruct": 0.32399999999999995, "qwen2.5-14b-instruct": 0.64, "qwen2.5-32b-instruct": 0.712, "qwen2.5-72b-instruct": 0.768, "llama-3.1-8b-instruct": 0.276, "llama-3.1-70b-instruct": 0.648, "llama-3.2-3b-instruct": 0.176, "llama-3.3-70b-instruct": 0.62, "mistral-large-instruct-2411": 0.748, "gemma-2-27b-it": 0.384, "gemma-2-9b-it": 0.29600000000000004, "deepseek-v3": 0.8640000000000001, "deepseek-r1": 0.82, "qwq-32b": 0.8720000000000001, "Average": 0.5546666666666668 }, "AudioDialectEnv": { "qwen2.5-3b-instruct": 0.128, "qwen2.5-7b-instruct": 0.312, "qwen2.5-14b-instruct": 0.5800000000000001, "qwen2.5-32b-instruct": 0.6, "qwen2.5-72b-instruct": 0.528, "llama-3.1-8b-instruct": 0.21600000000000003, "llama-3.1-70b-instruct": 0.4, "llama-3.2-3b-instruct": 0.132, "llama-3.3-70b-instruct": 0.32399999999999995, "mistral-large-instruct-2411": 0.68, "gemma-2-27b-it": 0.28, "gemma-2-9b-it": 0.11600000000000002, "deepseek-v3": 0.7520000000000001, "deepseek-r1": 0.7919999999999999, "qwq-32b": 0.8119999999999999, "Average": 0.4434666666666666 }, "LeadershipEnv": { "qwen2.5-3b-instruct": 0.164, "qwen2.5-7b-instruct": 0.372, "qwen2.5-14b-instruct": 0.7, "qwen2.5-32b-instruct": 0.732, "qwen2.5-72b-instruct": 0.7639999999999999, "llama-3.1-8b-instruct": 0.364, "llama-3.1-70b-instruct": 0.708, "llama-3.2-3b-instruct": 0.128, "llama-3.3-70b-instruct": 0.6920000000000001, "mistral-large-instruct-2411": 0.728, "gemma-2-27b-it": 0.46799999999999997, "gemma-2-9b-it": 0.20400000000000001, "deepseek-v3": 0.8200000000000001, "deepseek-r1": 0.748, "qwq-32b": 0.828, "Average": 0.5613333333333334 }, "TransportEnv": { "qwen2.5-3b-instruct": 0.196, "qwen2.5-7b-instruct": 0.372, "qwen2.5-14b-instruct": 0.716, "qwen2.5-32b-instruct": 0.732, "qwen2.5-72b-instruct": 0.8, "llama-3.1-8b-instruct": 0.316, "llama-3.1-70b-instruct": 0.648, "llama-3.2-3b-instruct": 0.15200000000000002, "llama-3.3-70b-instruct": 0.6000000000000001, "mistral-large-instruct-2411": 0.7879999999999999, "gemma-2-27b-it": 0.44399999999999995, "gemma-2-9b-it": 0.364, "deepseek-v3": 0.8640000000000001, "deepseek-r1": 0.8240000000000001, "qwq-32b": 0.9199999999999999, "Average": 0.5824 }, "EcologicalEnv": { "qwen2.5-3b-instruct": 0.152, "qwen2.5-7b-instruct": 0.45600000000000007, "qwen2.5-14b-instruct": 0.748, "qwen2.5-32b-instruct": 0.82, "qwen2.5-72b-instruct": 0.792, "llama-3.1-8b-instruct": 0.42000000000000004, "llama-3.1-70b-instruct": 0.692, "llama-3.2-3b-instruct": 0.21600000000000003, "llama-3.3-70b-instruct": 0.64, "mistral-large-instruct-2411": 0.772, "gemma-2-27b-it": 0.5680000000000001, "gemma-2-9b-it": 0.46799999999999997, "deepseek-v3": 0.868, "deepseek-r1": 0.8720000000000001, "qwq-32b": 0.8879999999999999, "Average": 0.6248 }, "MythicEnv": { "qwen2.5-3b-instruct": 0.132, "qwen2.5-7b-instruct": 0.36, "qwen2.5-14b-instruct": 0.744, "qwen2.5-32b-instruct": 0.74, "qwen2.5-72b-instruct": 0.672, "llama-3.1-8b-instruct": 0.236, "llama-3.1-70b-instruct": 0.596, "llama-3.2-3b-instruct": 0.12, "llama-3.3-70b-instruct": 0.576, "mistral-large-instruct-2411": 0.6960000000000001, "gemma-2-27b-it": 0.45599999999999996, "gemma-2-9b-it": 0.136, "deepseek-v3": 0.8960000000000001, "deepseek-r1": 0.8720000000000001, "qwq-32b": 0.8400000000000001, "Average": 0.5381333333333332 }, "EnzymeEnv": { "qwen2.5-3b-instruct": 0.252, "qwen2.5-7b-instruct": 0.43200000000000005, "qwen2.5-14b-instruct": 0.636, "qwen2.5-32b-instruct": 0.676, "qwen2.5-72b-instruct": 0.676, "llama-3.1-8b-instruct": 0.316, "llama-3.1-70b-instruct": 0.552, "llama-3.2-3b-instruct": 0.192, "llama-3.3-70b-instruct": 0.5640000000000001, "mistral-large-instruct-2411": 0.732, "gemma-2-27b-it": 0.43600000000000005, "gemma-2-9b-it": 0.264, "deepseek-v3": 0.8400000000000001, "deepseek-r1": 0.76, "qwq-32b": 0.804, "Average": 0.5421333333333334 }, "OSKernelEnv": { "qwen2.5-3b-instruct": 0.192, "qwen2.5-7b-instruct": 0.28400000000000003, "qwen2.5-14b-instruct": 0.8119999999999999, "qwen2.5-32b-instruct": 0.784, "qwen2.5-72b-instruct": 0.788, "llama-3.1-8b-instruct": 0.316, "llama-3.1-70b-instruct": 0.6920000000000001, "llama-3.2-3b-instruct": 0.128, "llama-3.3-70b-instruct": 0.74, "mistral-large-instruct-2411": 0.8559999999999999, "gemma-2-27b-it": 0.46399999999999997, "gemma-2-9b-it": 0.2, "deepseek-v3": 0.9480000000000001, "deepseek-r1": 0.96, "qwq-32b": 0.984, "Average": 0.6098666666666668 }, "MineralClassificationEnv": { "qwen2.5-3b-instruct": 0.11600000000000002, "qwen2.5-7b-instruct": 0.248, "qwen2.5-14b-instruct": 0.8320000000000001, "qwen2.5-32b-instruct": 0.9040000000000001, "qwen2.5-72b-instruct": 0.884, "llama-3.1-8b-instruct": 0.384, "llama-3.1-70b-instruct": 0.8240000000000001, "llama-3.2-3b-instruct": 0.14800000000000002, "llama-3.3-70b-instruct": 0.8960000000000001, "mistral-large-instruct-2411": 0.908, "gemma-2-27b-it": 0.508, "gemma-2-9b-it": 0.268, "deepseek-v3": 0.984, "deepseek-r1": 0.9199999999999999, "qwq-32b": 0.9640000000000001, "Average": 0.6525333333333333 }, "EconomicEnv": { "qwen2.5-3b-instruct": 0.136, "qwen2.5-7b-instruct": 0.24, "qwen2.5-14b-instruct": 0.8560000000000001, "qwen2.5-32b-instruct": 0.9199999999999999, "qwen2.5-72b-instruct": 0.8960000000000001, "llama-3.1-8b-instruct": 0.43600000000000005, "llama-3.1-70b-instruct": 0.808, "llama-3.2-3b-instruct": 0.152, "llama-3.3-70b-instruct": 0.8240000000000001, "mistral-large-instruct-2411": 0.924, "gemma-2-27b-it": 0.45199999999999996, "gemma-2-9b-it": 0.36, "deepseek-v3": 0.9559999999999998, "deepseek-r1": 0.9359999999999999, "qwq-32b": 0.9719999999999999, "Average": 0.6578666666666667 }, "DetectiveEnv": { "qwen2.5-3b-instruct": 0.168, "qwen2.5-7b-instruct": 0.38, "qwen2.5-14b-instruct": 0.836, "qwen2.5-32b-instruct": 0.884, "qwen2.5-72b-instruct": 0.8480000000000001, "llama-3.1-8b-instruct": 0.34800000000000003, "llama-3.1-70b-instruct": 0.74, "llama-3.2-3b-instruct": 0.248, "llama-3.3-70b-instruct": 0.792, "mistral-large-instruct-2411": 0.8960000000000001, "gemma-2-27b-it": 0.512, "gemma-2-9b-it": 0.33199999999999996, "deepseek-v3": 0.976, "deepseek-r1": 0.9640000000000001, "qwq-32b": 0.984, "Average": 0.6605333333333333 }, "ChessEnv": { "qwen2.5-3b-instruct": 0.184, "qwen2.5-7b-instruct": 0.27999999999999997, "qwen2.5-14b-instruct": 0.592, "qwen2.5-32b-instruct": 0.616, "qwen2.5-72b-instruct": 0.5720000000000001, "llama-3.1-8b-instruct": 0.188, "llama-3.1-70b-instruct": 0.6639999999999999, "llama-3.2-3b-instruct": 0.084, "llama-3.3-70b-instruct": 0.6280000000000001, "mistral-large-instruct-2411": 0.744, "gemma-2-27b-it": 0.30000000000000004, "gemma-2-9b-it": 0.096, "deepseek-v3": 0.696, "deepseek-r1": 0.6519999999999999, "qwq-32b": 0.664, "Average": 0.4639999999999999 }, "MythicalEnv": { "qwen2.5-3b-instruct": 0.2, "qwen2.5-7b-instruct": 0.336, "qwen2.5-14b-instruct": 0.8039999999999999, "qwen2.5-32b-instruct": 0.712, "qwen2.5-72b-instruct": 0.632, "llama-3.1-8b-instruct": 0.356, "llama-3.1-70b-instruct": 0.54, "llama-3.2-3b-instruct": 0.16, "llama-3.3-70b-instruct": 0.556, "mistral-large-instruct-2411": 0.728, "gemma-2-27b-it": 0.54, "gemma-2-9b-it": 0.404, "deepseek-v3": 0.9279999999999999, "deepseek-r1": 0.8959999999999999, "qwq-32b": 0.876, "Average": 0.5778666666666666 }, "ChemicalCompoundsEnv": { "qwen2.5-3b-instruct": 0.18, "qwen2.5-7b-instruct": 0.252, "qwen2.5-14b-instruct": 0.40800000000000003, "qwen2.5-32b-instruct": 0.30000000000000004, "qwen2.5-72b-instruct": 0.28400000000000003, "llama-3.1-8b-instruct": 0.148, "llama-3.1-70b-instruct": 0.28, "llama-3.2-3b-instruct": 0.14, "llama-3.3-70b-instruct": 0.18000000000000002, "mistral-large-instruct-2411": 0.43200000000000005, "gemma-2-27b-it": 0.23200000000000004, "gemma-2-9b-it": 0.13599999999999998, "deepseek-v3": 0.46799999999999997, "deepseek-r1": 0.624, "qwq-32b": 0.752, "Average": 0.32106666666666667 }, "ArchitecturalEnv": { "qwen2.5-3b-instruct": 0.20400000000000001, "qwen2.5-7b-instruct": 0.316, "qwen2.5-14b-instruct": 0.72, "qwen2.5-32b-instruct": 0.66, "qwen2.5-72b-instruct": 0.7120000000000001, "llama-3.1-8b-instruct": 0.256, "llama-3.1-70b-instruct": 0.556, "llama-3.2-3b-instruct": 0.132, "llama-3.3-70b-instruct": 0.508, "mistral-large-instruct-2411": 0.724, "gemma-2-27b-it": 0.488, "gemma-2-9b-it": 0.236, "deepseek-v3": 0.82, "deepseek-r1": 0.744, "qwq-32b": 0.8240000000000001, "Average": 0.5266666666666666 }, "ComputationEnv": { "qwen2.5-3b-instruct": 0.152, "qwen2.5-7b-instruct": 0.248, "qwen2.5-14b-instruct": 0.76, "qwen2.5-32b-instruct": 0.884, "qwen2.5-72b-instruct": 0.8560000000000001, "llama-3.1-8b-instruct": 0.32799999999999996, "llama-3.1-70b-instruct": 0.788, "llama-3.2-3b-instruct": 0.13999999999999999, "llama-3.3-70b-instruct": 0.8560000000000001, "mistral-large-instruct-2411": 0.828, "gemma-2-27b-it": 0.45199999999999996, "gemma-2-9b-it": 0.252, "deepseek-v3": 0.96, "deepseek-r1": 0.9399999999999998, "qwq-32b": 0.908, "Average": 0.6234666666666667 }, "MachinePartEnv": { "qwen2.5-3b-instruct": 0.14, "qwen2.5-7b-instruct": 0.32, "qwen2.5-14b-instruct": 0.8240000000000001, "qwen2.5-32b-instruct": 0.8800000000000001, "qwen2.5-72b-instruct": 0.828, "llama-3.1-8b-instruct": 0.376, "llama-3.1-70b-instruct": 0.8200000000000001, "llama-3.2-3b-instruct": 0.168, "llama-3.3-70b-instruct": 0.8960000000000001, "mistral-large-instruct-2411": 0.876, "gemma-2-27b-it": 0.508, "gemma-2-9b-it": 0.268, "deepseek-v3": 0.9719999999999999, "deepseek-r1": 0.952, "qwq-32b": 0.916, "Average": 0.6496 }, "LiteraryEnv": { "qwen2.5-3b-instruct": 0.10400000000000001, "qwen2.5-7b-instruct": 0.328, "qwen2.5-14b-instruct": 0.8800000000000001, "qwen2.5-32b-instruct": 0.9279999999999999, "qwen2.5-72b-instruct": 0.9, "llama-3.1-8b-instruct": 0.336, "llama-3.1-70b-instruct": 0.664, "llama-3.2-3b-instruct": 0.13999999999999999, "llama-3.3-70b-instruct": 0.664, "mistral-large-instruct-2411": 0.884, "gemma-2-27b-it": 0.44399999999999995, "gemma-2-9b-it": 0.13999999999999999, "deepseek-v3": 0.984, "deepseek-r1": 0.9119999999999999, "qwq-32b": 0.968, "Average": 0.6184 }, "MarineEnv": { "qwen2.5-3b-instruct": 0.144, "qwen2.5-7b-instruct": 0.384, "qwen2.5-14b-instruct": 0.8720000000000001, "qwen2.5-32b-instruct": 0.844, "qwen2.5-72b-instruct": 0.8320000000000001, "llama-3.1-8b-instruct": 0.308, "llama-3.1-70b-instruct": 0.636, "llama-3.2-3b-instruct": 0.12000000000000002, "llama-3.3-70b-instruct": 0.704, "mistral-large-instruct-2411": 0.7879999999999999, "gemma-2-27b-it": 0.484, "gemma-2-9b-it": 0.23199999999999998, "deepseek-v3": 0.884, "deepseek-r1": 0.9, "qwq-32b": 0.8880000000000001, "Average": 0.6013333333333334 }, "PhilosophyEnv": { "qwen2.5-3b-instruct": 0.144, "qwen2.5-7b-instruct": 0.3, "qwen2.5-14b-instruct": 0.7280000000000001, "qwen2.5-32b-instruct": 0.82, "qwen2.5-72b-instruct": 0.8719999999999999, "llama-3.1-8b-instruct": 0.32799999999999996, "llama-3.1-70b-instruct": 0.764, "llama-3.2-3b-instruct": 0.036000000000000004, "llama-3.3-70b-instruct": 0.796, "mistral-large-instruct-2411": 0.7879999999999999, "gemma-2-27b-it": 0.372, "gemma-2-9b-it": 0.28, "deepseek-v3": 0.844, "deepseek-r1": 0.78, "qwq-32b": 0.8320000000000001, "Average": 0.5789333333333334 }, "ArchaeologicalEnv": { "qwen2.5-3b-instruct": 0.18, "qwen2.5-7b-instruct": 0.38, "qwen2.5-14b-instruct": 0.58, "qwen2.5-32b-instruct": 0.608, "qwen2.5-72b-instruct": 0.5640000000000001, "llama-3.1-8b-instruct": 0.26, "llama-3.1-70b-instruct": 0.608, "llama-3.2-3b-instruct": 0.192, "llama-3.3-70b-instruct": 0.548, "mistral-large-instruct-2411": 0.64, "gemma-2-27b-it": 0.476, "gemma-2-9b-it": 0.30000000000000004, "deepseek-v3": 0.916, "deepseek-r1": 0.7040000000000001, "qwq-32b": 0.7559999999999999, "Average": 0.5141333333333333 }, "GemstoneEnv": { "qwen2.5-3b-instruct": 0.192, "qwen2.5-7b-instruct": 0.264, "qwen2.5-14b-instruct": 0.492, "qwen2.5-32b-instruct": 0.45599999999999996, "qwen2.5-72b-instruct": 0.44000000000000006, "llama-3.1-8b-instruct": 0.192, "llama-3.1-70b-instruct": 0.40800000000000003, "llama-3.2-3b-instruct": 0.15200000000000002, "llama-3.3-70b-instruct": 0.45599999999999996, "mistral-large-instruct-2411": 0.528, "gemma-2-27b-it": 0.33999999999999997, "gemma-2-9b-it": 0.256, "deepseek-v3": 0.5680000000000001, "deepseek-r1": 0.5680000000000001, "qwq-32b": 0.636, "Average": 0.3965333333333333 }, "MicrobiologyEnv": { "qwen2.5-3b-instruct": 0.14400000000000002, "qwen2.5-7b-instruct": 0.38400000000000006, "qwen2.5-14b-instruct": 0.752, "qwen2.5-32b-instruct": 0.7, "qwen2.5-72b-instruct": 0.844, "llama-3.1-8b-instruct": 0.316, "llama-3.1-70b-instruct": 0.512, "llama-3.2-3b-instruct": 0.12000000000000002, "llama-3.3-70b-instruct": 0.496, "mistral-large-instruct-2411": 0.764, "gemma-2-27b-it": 0.504, "gemma-2-9b-it": 0.172, "deepseek-v3": 0.9279999999999999, "deepseek-r1": 0.952, "qwq-32b": 0.932, "Average": 0.568 }, "SciFiEnv": { "qwen2.5-3b-instruct": 0.192, "qwen2.5-7b-instruct": 0.384, "qwen2.5-14b-instruct": 0.7879999999999999, "qwen2.5-32b-instruct": 0.776, "qwen2.5-72b-instruct": 0.7879999999999999, "llama-3.1-8b-instruct": 0.35200000000000004, "llama-3.1-70b-instruct": 0.664, "llama-3.2-3b-instruct": 0.164, "llama-3.3-70b-instruct": 0.588, "mistral-large-instruct-2411": 0.736, "gemma-2-27b-it": 0.52, "gemma-2-9b-it": 0.33599999999999997, "deepseek-v3": 0.9279999999999999, "deepseek-r1": 0.9199999999999999, "qwq-32b": 0.9, "Average": 0.6023999999999999 }, "HormoneEnv": { "qwen2.5-3b-instruct": 0.152, "qwen2.5-7b-instruct": 0.40800000000000003, "qwen2.5-14b-instruct": 0.7999999999999999, "qwen2.5-32b-instruct": 0.784, "qwen2.5-72b-instruct": 0.764, "llama-3.1-8b-instruct": 0.336, "llama-3.1-70b-instruct": 0.76, "llama-3.2-3b-instruct": 0.184, "llama-3.3-70b-instruct": 0.8480000000000001, "mistral-large-instruct-2411": 0.8, "gemma-2-27b-it": 0.524, "gemma-2-9b-it": 0.312, "deepseek-v3": 0.9480000000000001, "deepseek-r1": 0.944, "qwq-32b": 0.852, "Average": 0.6277333333333334 }, "SculptorEnv": { "qwen2.5-3b-instruct": 0.23200000000000004, "qwen2.5-7b-instruct": 0.4159999999999999, "qwen2.5-14b-instruct": 0.7079999999999999, "qwen2.5-32b-instruct": 0.636, "qwen2.5-72b-instruct": 0.6, "llama-3.1-8b-instruct": 0.22799999999999998, "llama-3.1-70b-instruct": 0.484, "llama-3.2-3b-instruct": 0.188, "llama-3.3-70b-instruct": 0.532, "mistral-large-instruct-2411": 0.684, "gemma-2-27b-it": 0.30000000000000004, "gemma-2-9b-it": 0.156, "deepseek-v3": 0.788, "deepseek-r1": 0.7479999999999999, "qwq-32b": 0.8119999999999999, "Average": 0.5008 }, "NeuroEnv": { "qwen2.5-3b-instruct": 0.10800000000000001, "qwen2.5-7b-instruct": 0.24400000000000005, "qwen2.5-14b-instruct": 0.8960000000000001, "qwen2.5-32b-instruct": 0.892, "qwen2.5-72b-instruct": 0.8879999999999999, "llama-3.1-8b-instruct": 0.512, "llama-3.1-70b-instruct": 0.8880000000000001, "llama-3.2-3b-instruct": 0.20400000000000001, "llama-3.3-70b-instruct": 0.9279999999999999, "mistral-large-instruct-2411": 0.8880000000000001, "gemma-2-27b-it": 0.72, "gemma-2-9b-it": 0.42800000000000005, "deepseek-v3": 0.952, "deepseek-r1": 0.932, "qwq-32b": 0.852, "Average": 0.6888000000000001 }, "OceanEnv": { "qwen2.5-3b-instruct": 0.2, "qwen2.5-7b-instruct": 0.45999999999999996, "qwen2.5-14b-instruct": 0.6160000000000001, "qwen2.5-32b-instruct": 0.6000000000000001, "qwen2.5-72b-instruct": 0.62, "llama-3.1-8b-instruct": 0.36400000000000005, "llama-3.1-70b-instruct": 0.5680000000000001, "llama-3.2-3b-instruct": 0.156, "llama-3.3-70b-instruct": 0.476, "mistral-large-instruct-2411": 0.656, "gemma-2-27b-it": 0.43200000000000005, "gemma-2-9b-it": 0.248, "deepseek-v3": 0.852, "deepseek-r1": 0.836, "qwq-32b": 0.8240000000000001, "Average": 0.5272000000000001 }, "MineralEnv": { "qwen2.5-3b-instruct": 0.14400000000000002, "qwen2.5-7b-instruct": 0.38, "qwen2.5-14b-instruct": 0.768, "qwen2.5-32b-instruct": 0.6960000000000001, "qwen2.5-72b-instruct": 0.684, "llama-3.1-8b-instruct": 0.29600000000000004, "llama-3.1-70b-instruct": 0.556, "llama-3.2-3b-instruct": 0.16, "llama-3.3-70b-instruct": 0.56, "mistral-large-instruct-2411": 0.66, "gemma-2-27b-it": 0.384, "gemma-2-9b-it": 0.17200000000000001, "deepseek-v3": 0.8480000000000001, "deepseek-r1": 0.82, "qwq-32b": 0.8720000000000001, "Average": 0.5333333333333333 }, "FishEnv": { "qwen2.5-3b-instruct": 0.188, "qwen2.5-7b-instruct": 0.38, "qwen2.5-14b-instruct": 0.732, "qwen2.5-32b-instruct": 0.668, "qwen2.5-72b-instruct": 0.7200000000000001, "llama-3.1-8b-instruct": 0.392, "llama-3.1-70b-instruct": 0.624, "llama-3.2-3b-instruct": 0.13599999999999998, "llama-3.3-70b-instruct": 0.616, "mistral-large-instruct-2411": 0.736, "gemma-2-27b-it": 0.508, "gemma-2-9b-it": 0.268, "deepseek-v3": 0.86, "deepseek-r1": 0.868, "qwq-32b": 0.924, "Average": 0.5746666666666667 }, "MartialArtsEnv": { "qwen2.5-3b-instruct": 0.184, "qwen2.5-7b-instruct": 0.43200000000000005, "qwen2.5-14b-instruct": 0.672, "qwen2.5-32b-instruct": 0.5640000000000001, "qwen2.5-72b-instruct": 0.56, "llama-3.1-8b-instruct": 0.276, "llama-3.1-70b-instruct": 0.54, "llama-3.2-3b-instruct": 0.2, "llama-3.3-70b-instruct": 0.52, "mistral-large-instruct-2411": 0.568, "gemma-2-27b-it": 0.4, "gemma-2-9b-it": 0.22400000000000003, "deepseek-v3": 0.784, "deepseek-r1": 0.716, "qwq-32b": 0.752, "Average": 0.4928 }, "RocketFuelEnv": { "qwen2.5-3b-instruct": 0.22800000000000004, "qwen2.5-7b-instruct": 0.41600000000000004, "qwen2.5-14b-instruct": 0.852, "qwen2.5-32b-instruct": 0.7879999999999999, "qwen2.5-72b-instruct": 0.8160000000000001, "llama-3.1-8b-instruct": 0.36, "llama-3.1-70b-instruct": 0.6799999999999999, "llama-3.2-3b-instruct": 0.184, "llama-3.3-70b-instruct": 0.7239999999999999, "mistral-large-instruct-2411": 0.828, "gemma-2-27b-it": 0.6279999999999999, "gemma-2-9b-it": 0.248, "deepseek-v3": 0.916, "deepseek-r1": 0.8960000000000001, "qwq-32b": 0.9040000000000001, "Average": 0.6312000000000001 }, "MLEnv": { "qwen2.5-3b-instruct": 0.088, "qwen2.5-7b-instruct": 0.392, "qwen2.5-14b-instruct": 0.6, "qwen2.5-32b-instruct": 0.748, "qwen2.5-72b-instruct": 0.792, "llama-3.1-8b-instruct": 0.304, "llama-3.1-70b-instruct": 0.672, "llama-3.2-3b-instruct": 0.10799999999999998, "llama-3.3-70b-instruct": 0.5960000000000001, "mistral-large-instruct-2411": 0.7639999999999999, "gemma-2-27b-it": 0.264, "gemma-2-9b-it": 0.156, "deepseek-v3": 0.808, "deepseek-r1": 0.652, "qwq-32b": 0.772, "Average": 0.5144 }, "PoliticalManifestoEnv": { "qwen2.5-3b-instruct": 0.184, "qwen2.5-7b-instruct": 0.312, "qwen2.5-14b-instruct": 0.76, "qwen2.5-32b-instruct": 0.852, "qwen2.5-72b-instruct": 0.7839999999999999, "llama-3.1-8b-instruct": 0.42400000000000004, "llama-3.1-70b-instruct": 0.62, "llama-3.2-3b-instruct": 0.128, "llama-3.3-70b-instruct": 0.692, "mistral-large-instruct-2411": 0.796, "gemma-2-27b-it": 0.45200000000000007, "gemma-2-9b-it": 0.152, "deepseek-v3": 0.86, "deepseek-r1": 0.792, "qwq-32b": 0.8800000000000001, "Average": 0.5792 }, "CoffeeEnv": { "qwen2.5-3b-instruct": 0.20400000000000001, "qwen2.5-7b-instruct": 0.38, "qwen2.5-14b-instruct": 0.7799999999999999, "qwen2.5-32b-instruct": 0.8039999999999999, "qwen2.5-72b-instruct": 0.764, "llama-3.1-8b-instruct": 0.31599999999999995, "llama-3.1-70b-instruct": 0.552, "llama-3.2-3b-instruct": 0.17200000000000001, "llama-3.3-70b-instruct": 0.6599999999999999, "mistral-large-instruct-2411": 0.828, "gemma-2-27b-it": 0.592, "gemma-2-9b-it": 0.364, "deepseek-v3": 0.9120000000000001, "deepseek-r1": 0.9279999999999999, "qwq-32b": 0.9359999999999999, "Average": 0.6128 }, "MotifAnalysisEnv": { "qwen2.5-3b-instruct": 0.096, "qwen2.5-7b-instruct": 0.332, "qwen2.5-14b-instruct": 0.5680000000000001, "qwen2.5-32b-instruct": 0.496, "qwen2.5-72b-instruct": 0.5920000000000001, "llama-3.1-8b-instruct": 0.244, "llama-3.1-70b-instruct": 0.36000000000000004, "llama-3.2-3b-instruct": 0.13999999999999999, "llama-3.3-70b-instruct": 0.22400000000000003, "mistral-large-instruct-2411": 0.46399999999999997, "gemma-2-27b-it": 0.18, "gemma-2-9b-it": 0.128, "deepseek-v3": 0.752, "deepseek-r1": 0.8240000000000001, "qwq-32b": 0.8640000000000001, "Average": 0.4176 }, "NutritionEnv": { "qwen2.5-3b-instruct": 0.132, "qwen2.5-7b-instruct": 0.22000000000000003, "qwen2.5-14b-instruct": 0.7920000000000001, "qwen2.5-32b-instruct": 0.8400000000000001, "qwen2.5-72b-instruct": 0.876, "llama-3.1-8b-instruct": 0.264, "llama-3.1-70b-instruct": 0.64, "llama-3.2-3b-instruct": 0.128, "llama-3.3-70b-instruct": 0.7040000000000001, "mistral-large-instruct-2411": 0.8320000000000001, "gemma-2-27b-it": 0.38, "gemma-2-9b-it": 0.20800000000000002, "deepseek-v3": 0.944, "deepseek-r1": 0.944, "qwq-32b": 0.9120000000000001, "Average": 0.5877333333333333 }, "MalwareEnv": { "qwen2.5-3b-instruct": 0.16, "qwen2.5-7b-instruct": 0.316, "qwen2.5-14b-instruct": 0.728, "qwen2.5-32b-instruct": 0.756, "qwen2.5-72b-instruct": 0.7200000000000001, "llama-3.1-8b-instruct": 0.268, "llama-3.1-70b-instruct": 0.5840000000000001, "llama-3.2-3b-instruct": 0.10800000000000001, "llama-3.3-70b-instruct": 0.548, "mistral-large-instruct-2411": 0.752, "gemma-2-27b-it": 0.252, "gemma-2-9b-it": 0.12, "deepseek-v3": 0.916, "deepseek-r1": 0.9, "qwq-32b": 0.916, "Average": 0.5362666666666667 }, "GeologicalEnv": { "qwen2.5-3b-instruct": 0.132, "qwen2.5-7b-instruct": 0.336, "qwen2.5-14b-instruct": 0.7639999999999999, "qwen2.5-32b-instruct": 0.748, "qwen2.5-72b-instruct": 0.676, "llama-3.1-8b-instruct": 0.28800000000000003, "llama-3.1-70b-instruct": 0.552, "llama-3.2-3b-instruct": 0.13999999999999999, "llama-3.3-70b-instruct": 0.508, "mistral-large-instruct-2411": 0.812, "gemma-2-27b-it": 0.41600000000000004, "gemma-2-9b-it": 0.164, "deepseek-v3": 0.9119999999999999, "deepseek-r1": 0.8480000000000001, "qwq-32b": 0.8880000000000001, "Average": 0.5456000000000001 }, "TheatricalEnv": { "qwen2.5-3b-instruct": 0.14400000000000002, "qwen2.5-7b-instruct": 0.42400000000000004, "qwen2.5-14b-instruct": 0.676, "qwen2.5-32b-instruct": 0.78, "qwen2.5-72b-instruct": 0.808, "llama-3.1-8b-instruct": 0.41200000000000003, "llama-3.1-70b-instruct": 0.7959999999999999, "llama-3.2-3b-instruct": 0.1, "llama-3.3-70b-instruct": 0.768, "mistral-large-instruct-2411": 0.844, "gemma-2-27b-it": 0.528, "gemma-2-9b-it": 0.28, "deepseek-v3": 0.884, "deepseek-r1": 0.8240000000000001, "qwq-32b": 0.908, "Average": 0.6117333333333335 }, "PrintingTechniqueEnv": { "qwen2.5-3b-instruct": 0.144, "qwen2.5-7b-instruct": 0.252, "qwen2.5-14b-instruct": 0.736, "qwen2.5-32b-instruct": 0.7200000000000001, "qwen2.5-72b-instruct": 0.776, "llama-3.1-8b-instruct": 0.4, "llama-3.1-70b-instruct": 0.54, "llama-3.2-3b-instruct": 0.16, "llama-3.3-70b-instruct": 0.548, "mistral-large-instruct-2411": 0.7040000000000001, "gemma-2-27b-it": 0.44000000000000006, "gemma-2-9b-it": 0.192, "deepseek-v3": 0.916, "deepseek-r1": 0.852, "qwq-32b": 0.9279999999999999, "Average": 0.5538666666666666 }, "StellarEnv": { "qwen2.5-3b-instruct": 0.132, "qwen2.5-7b-instruct": 0.388, "qwen2.5-14b-instruct": 0.6759999999999999, "qwen2.5-32b-instruct": 0.724, "qwen2.5-72b-instruct": 0.6960000000000001, "llama-3.1-8b-instruct": 0.30000000000000004, "llama-3.1-70b-instruct": 0.6040000000000001, "llama-3.2-3b-instruct": 0.16, "llama-3.3-70b-instruct": 0.6240000000000001, "mistral-large-instruct-2411": 0.732, "gemma-2-27b-it": 0.364, "gemma-2-9b-it": 0.23199999999999998, "deepseek-v3": 0.82, "deepseek-r1": 0.648, "qwq-32b": 0.776, "Average": 0.5250666666666667 }, "SoilEnv": { "qwen2.5-3b-instruct": 0.172, "qwen2.5-7b-instruct": 0.48, "qwen2.5-14b-instruct": 0.8320000000000001, "qwen2.5-32b-instruct": 0.788, "qwen2.5-72b-instruct": 0.8240000000000001, "llama-3.1-8b-instruct": 0.42400000000000004, "llama-3.1-70b-instruct": 0.64, "llama-3.2-3b-instruct": 0.22799999999999998, "llama-3.3-70b-instruct": 0.664, "mistral-large-instruct-2411": 0.76, "gemma-2-27b-it": 0.628, "gemma-2-9b-it": 0.44000000000000006, "deepseek-v3": 0.884, "deepseek-r1": 0.8039999999999999, "qwq-32b": 0.8480000000000001, "Average": 0.6277333333333334 }, "SoftwareEnv": { "qwen2.5-3b-instruct": 0.14800000000000002, "qwen2.5-7b-instruct": 0.40800000000000003, "qwen2.5-14b-instruct": 0.744, "qwen2.5-32b-instruct": 0.86, "qwen2.5-72b-instruct": 0.8400000000000001, "llama-3.1-8b-instruct": 0.4159999999999999, "llama-3.1-70b-instruct": 0.72, "llama-3.2-3b-instruct": 0.16799999999999998, "llama-3.3-70b-instruct": 0.784, "mistral-large-instruct-2411": 0.804, "gemma-2-27b-it": 0.528, "gemma-2-9b-it": 0.308, "deepseek-v3": 0.836, "deepseek-r1": 0.8360000000000001, "qwq-32b": 0.8800000000000001, "Average": 0.6186666666666667 }, "CarIdentificationEnv": { "qwen2.5-3b-instruct": 0.272, "qwen2.5-7b-instruct": 0.4, "qwen2.5-14b-instruct": 0.9120000000000001, "qwen2.5-32b-instruct": 0.916, "qwen2.5-72b-instruct": 0.9359999999999999, "llama-3.1-8b-instruct": 0.544, "llama-3.1-70b-instruct": 0.8400000000000001, "llama-3.2-3b-instruct": 0.124, "llama-3.3-70b-instruct": 0.852, "mistral-large-instruct-2411": 0.9119999999999999, "gemma-2-27b-it": 0.672, "gemma-2-9b-it": 0.376, "deepseek-v3": 0.992, "deepseek-r1": 0.952, "qwq-32b": 0.9879999999999999, "Average": 0.7125333333333334 }, "PharmaceuticalEnv": { "qwen2.5-3b-instruct": 0.156, "qwen2.5-7b-instruct": 0.32, "qwen2.5-14b-instruct": 0.7600000000000001, "qwen2.5-32b-instruct": 0.752, "qwen2.5-72b-instruct": 0.7559999999999999, "llama-3.1-8b-instruct": 0.28400000000000003, "llama-3.1-70b-instruct": 0.508, "llama-3.2-3b-instruct": 0.148, "llama-3.3-70b-instruct": 0.472, "mistral-large-instruct-2411": 0.756, "gemma-2-27b-it": 0.336, "gemma-2-9b-it": 0.128, "deepseek-v3": 0.8800000000000001, "deepseek-r1": 0.8640000000000001, "qwq-32b": 0.8, "Average": 0.528 }, "NetworkEnv": { "qwen2.5-3b-instruct": 0.184, "qwen2.5-7b-instruct": 0.36, "qwen2.5-14b-instruct": 0.66, "qwen2.5-32b-instruct": 0.716, "qwen2.5-72b-instruct": 0.716, "llama-3.1-8b-instruct": 0.43199999999999994, "llama-3.1-70b-instruct": 0.68, "llama-3.2-3b-instruct": 0.14400000000000002, "llama-3.3-70b-instruct": 0.7040000000000001, "mistral-large-instruct-2411": 0.78, "gemma-2-27b-it": 0.492, "gemma-2-9b-it": 0.392, "deepseek-v3": 0.8400000000000001, "deepseek-r1": 0.736, "qwq-32b": 0.828, "Average": 0.5776 }, "BirdNestEnv": { "qwen2.5-3b-instruct": 0.148, "qwen2.5-7b-instruct": 0.21200000000000002, "qwen2.5-14b-instruct": 0.48, "qwen2.5-32b-instruct": 0.33999999999999997, "qwen2.5-72b-instruct": 0.42400000000000004, "llama-3.1-8b-instruct": 0.16799999999999998, "llama-3.1-70b-instruct": 0.22400000000000003, "llama-3.2-3b-instruct": 0.084, "llama-3.3-70b-instruct": 0.20800000000000002, "mistral-large-instruct-2411": 0.492, "gemma-2-27b-it": 0.176, "gemma-2-9b-it": 0.128, "deepseek-v3": 0.764, "deepseek-r1": 0.756, "qwq-32b": 0.8119999999999999, "Average": 0.36106666666666676 }, "EnergyEnv": { "qwen2.5-3b-instruct": 0.15999999999999998, "qwen2.5-7b-instruct": 0.42000000000000004, "qwen2.5-14b-instruct": 0.7999999999999999, "qwen2.5-32b-instruct": 0.7, "qwen2.5-72b-instruct": 0.5880000000000001, "llama-3.1-8b-instruct": 0.29600000000000004, "llama-3.1-70b-instruct": 0.46799999999999997, "llama-3.2-3b-instruct": 0.18, "llama-3.3-70b-instruct": 0.396, "mistral-large-instruct-2411": 0.78, "gemma-2-27b-it": 0.35200000000000004, "gemma-2-9b-it": 0.196, "deepseek-v3": 0.916, "deepseek-r1": 0.8720000000000001, "qwq-32b": 0.8880000000000001, "Average": 0.5341333333333333 }, "LanguageEnv": { "qwen2.5-3b-instruct": 0.196, "qwen2.5-7b-instruct": 0.304, "qwen2.5-14b-instruct": 0.388, "qwen2.5-32b-instruct": 0.512, "qwen2.5-72b-instruct": 0.5599999999999999, "llama-3.1-8b-instruct": 0.23200000000000004, "llama-3.1-70b-instruct": 0.40800000000000003, "llama-3.2-3b-instruct": 0.144, "llama-3.3-70b-instruct": 0.336, "mistral-large-instruct-2411": 0.536, "gemma-2-27b-it": 0.20800000000000002, "gemma-2-9b-it": 0.172, "deepseek-v3": 0.724, "deepseek-r1": 0.716, "qwq-32b": 0.8119999999999999, "Average": 0.41653333333333337 }, "AlgorithmEnv": { "qwen2.5-3b-instruct": 0.1, "qwen2.5-7b-instruct": 0.28400000000000003, "qwen2.5-14b-instruct": 0.688, "qwen2.5-32b-instruct": 0.6960000000000001, "qwen2.5-72b-instruct": 0.66, "llama-3.1-8b-instruct": 0.35200000000000004, "llama-3.1-70b-instruct": 0.512, "llama-3.2-3b-instruct": 0.22399999999999998, "llama-3.3-70b-instruct": 0.484, "mistral-large-instruct-2411": 0.788, "gemma-2-27b-it": 0.268, "gemma-2-9b-it": 0.164, "deepseek-v3": 0.792, "deepseek-r1": 0.724, "qwq-32b": 0.812, "Average": 0.5032 }, "MathematicalEnv": { "qwen2.5-3b-instruct": 0.048, "qwen2.5-7b-instruct": 0.42800000000000005, "qwen2.5-14b-instruct": 0.7000000000000001, "qwen2.5-32b-instruct": 0.8119999999999999, "qwen2.5-72b-instruct": 0.792, "llama-3.1-8b-instruct": 0.316, "llama-3.1-70b-instruct": 0.8, "llama-3.2-3b-instruct": 0.12800000000000003, "llama-3.3-70b-instruct": 0.8400000000000001, "mistral-large-instruct-2411": 0.884, "gemma-2-27b-it": 0.268, "gemma-2-9b-it": 0.068, "deepseek-v3": 0.9119999999999999, "deepseek-r1": 0.876, "qwq-32b": 0.8160000000000001, "Average": 0.5792 }, "MusicalEnv": { "qwen2.5-3b-instruct": 0.04, "qwen2.5-7b-instruct": 0.336, "qwen2.5-14b-instruct": 0.8039999999999999, "qwen2.5-32b-instruct": 0.8560000000000001, "qwen2.5-72b-instruct": 0.8400000000000001, "llama-3.1-8b-instruct": 0.34400000000000003, "llama-3.1-70b-instruct": 0.68, "llama-3.2-3b-instruct": 0.088, "llama-3.3-70b-instruct": 0.8240000000000001, "mistral-large-instruct-2411": 0.884, "gemma-2-27b-it": 0.28, "gemma-2-9b-it": 0.11599999999999999, "deepseek-v3": 0.9480000000000001, "deepseek-r1": 0.892, "qwq-32b": 0.9039999999999999, "Average": 0.5890666666666668 }, "InventorEnv": { "qwen2.5-3b-instruct": 0.14800000000000002, "qwen2.5-7b-instruct": 0.43200000000000005, "qwen2.5-14b-instruct": 0.776, "qwen2.5-32b-instruct": 0.7999999999999999, "qwen2.5-72b-instruct": 0.772, "llama-3.1-8b-instruct": 0.4, "llama-3.1-70b-instruct": 0.7, "llama-3.2-3b-instruct": 0.188, "llama-3.3-70b-instruct": 0.616, "mistral-large-instruct-2411": 0.8039999999999999, "gemma-2-27b-it": 0.552, "gemma-2-9b-it": 0.364, "deepseek-v3": 0.9399999999999998, "deepseek-r1": 0.908, "qwq-32b": 0.9, "Average": 0.62 }, "MedicalEnv": { "qwen2.5-3b-instruct": 0.22000000000000003, "qwen2.5-7b-instruct": 0.544, "qwen2.5-14b-instruct": 0.8320000000000001, "qwen2.5-32b-instruct": 0.8800000000000001, "qwen2.5-72b-instruct": 0.8960000000000001, "llama-3.1-8b-instruct": 0.52, "llama-3.1-70b-instruct": 0.82, "llama-3.2-3b-instruct": 0.23200000000000004, "llama-3.3-70b-instruct": 0.8960000000000001, "mistral-large-instruct-2411": 0.8960000000000001, "gemma-2-27b-it": 0.692, "gemma-2-9b-it": 0.5760000000000001, "deepseek-v3": 0.9039999999999999, "deepseek-r1": 0.9359999999999999, "qwq-32b": 0.9199999999999999, "Average": 0.7175999999999999 }, "MusicEnv": { "qwen2.5-3b-instruct": 0.184, "qwen2.5-7b-instruct": 0.26, "qwen2.5-14b-instruct": 0.656, "qwen2.5-32b-instruct": 0.664, "qwen2.5-72b-instruct": 0.7559999999999999, "llama-3.1-8b-instruct": 0.356, "llama-3.1-70b-instruct": 0.596, "llama-3.2-3b-instruct": 0.10800000000000001, "llama-3.3-70b-instruct": 0.596, "mistral-large-instruct-2411": 0.6639999999999999, "gemma-2-27b-it": 0.45600000000000007, "gemma-2-9b-it": 0.28400000000000003, "deepseek-v3": 0.8119999999999999, "deepseek-r1": 0.868, "qwq-32b": 0.868, "Average": 0.5418666666666667 }, "FantasyEnv": { "qwen2.5-3b-instruct": 0.148, "qwen2.5-7b-instruct": 0.32, "qwen2.5-14b-instruct": 0.74, "qwen2.5-32b-instruct": 0.7879999999999999, "qwen2.5-72b-instruct": 0.5720000000000001, "llama-3.1-8b-instruct": 0.40800000000000003, "llama-3.1-70b-instruct": 0.676, "llama-3.2-3b-instruct": 0.152, "llama-3.3-70b-instruct": 0.704, "mistral-large-instruct-2411": 0.8240000000000001, "gemma-2-27b-it": 0.524, "gemma-2-9b-it": 0.324, "deepseek-v3": 0.9199999999999999, "deepseek-r1": 0.9719999999999999, "qwq-32b": 0.9719999999999999, "Average": 0.6029333333333332 }, "EducationEnv": { "qwen2.5-3b-instruct": 0.10400000000000001, "qwen2.5-7b-instruct": 0.268, "qwen2.5-14b-instruct": 0.828, "qwen2.5-32b-instruct": 0.9039999999999999, "qwen2.5-72b-instruct": 0.8480000000000001, "llama-3.1-8b-instruct": 0.5680000000000001, "llama-3.1-70b-instruct": 0.768, "llama-3.2-3b-instruct": 0.192, "llama-3.3-70b-instruct": 0.9039999999999999, "mistral-large-instruct-2411": 0.876, "gemma-2-27b-it": 0.624, "gemma-2-9b-it": 0.45999999999999996, "deepseek-v3": 0.9480000000000001, "deepseek-r1": 0.9, "qwq-32b": 0.9359999999999999, "Average": 0.6752 }, "ChemicalEnv": { "qwen2.5-3b-instruct": 0.264, "qwen2.5-7b-instruct": 0.44000000000000006, "qwen2.5-14b-instruct": 0.724, "qwen2.5-32b-instruct": 0.7040000000000001, "qwen2.5-72b-instruct": 0.72, "llama-3.1-8b-instruct": 0.36, "llama-3.1-70b-instruct": 0.62, "llama-3.2-3b-instruct": 0.16399999999999998, "llama-3.3-70b-instruct": 0.45999999999999996, "mistral-large-instruct-2411": 0.68, "gemma-2-27b-it": 0.44399999999999995, "gemma-2-9b-it": 0.316, "deepseek-v3": 0.8799999999999999, "deepseek-r1": 0.6799999999999999, "qwq-32b": 0.8200000000000001, "Average": 0.5517333333333333 }, "Average": { "qwen2.5-3b-instruct": 0.1655841584158416, "qwen2.5-7b-instruct": 0.34736633663366323, "qwen2.5-14b-instruct": 0.7148514851485149, "qwen2.5-32b-instruct": 0.7330693069306928, "qwen2.5-72b-instruct": 0.7272079207920793, "llama-3.1-8b-instruct": 0.3334653465346535, "llama-3.1-70b-instruct": 0.6271287128712871, "llama-3.2-3b-instruct": 0.15599999999999997, "llama-3.3-70b-instruct": 0.6372277227722771, "mistral-large-instruct-2411": 0.7573861386138615, "gemma-2-27b-it": 0.44522772277227735, "gemma-2-9b-it": 0.264, "deepseek-v3": 0.8605148514851484, "deepseek-r1": 0.8304554455445546, "qwq-32b": 0.8630891089108911 } }