|
{ |
|
"gemini-1.5-pro": { |
|
"mmlu": "2024-11-04T16-56-26-05-00_mmlu_Z9KrcK7x4ZLAR5nJ9JaVUe.json", |
|
"humaneval": "2024-11-04T12-43-07-05-00_humaneval_5JBjtymGtK23qwVKxqidhV.json", |
|
"mmlu_pro": "2024-11-04T20-13-09-05-00_mmlu-pro_Hv2ujvKLV6H7ZwQu2q8LNw.json", |
|
"math": "2024-11-04T15-48-46-05-00_math_9DAZmGEfhpa3nUcmMAwqZe.json", |
|
"arc_easy": "2024-11-04T12-31-43-05-00_arc-easy_eGxYWywpLuREcaCKvHa8Uk.json", |
|
"gsm8k": "2024-11-04T15-15-26-05-00_gsm8k_cTebw3ugfrVz3dyPwxtdUZ.json", |
|
"gpqa_diamond": "2024-11-05T09-56-31-05-00_gpqa-diamond_FBq2bnoyGYQ3NF96xQw8iy.json", |
|
"ifeval": "2024-11-04T12-43-32-05-00_ifeval_mSwZ7AwA7akj5PjZbQMjgC.json", |
|
"winogrande": "2024-11-04T12-40-46-05-00_winogrande_5SmD6rx47zmZvHHkQSSfHK.json", |
|
"arc_challenge": "2024-11-04T12-37-36-05-00_arc-challenge_5VVApyQD22QpJoMm53EMdU.json", |
|
"drop": "2024-11-04T12-44-32-05-00_drop_9dzPKVJojSVsxmiBFnej2m.json", |
|
"hellaswag": "2024-11-05T13-14-31-05-00_hellaswag_N98eeftuY2pucRtgpUYk5m.json", |
|
"gaia": "2024-11-15T12-53-32-05-00_gaia_NvyGRTXFrFskJfUvuLwvVr.json", |
|
"gdm_intercode_ctf": "2024-11-15T16-23-23-05-00_gdm-intercode-ctf_3JrgtTMcijTUxHVaagPRYh.json" |
|
}, |
|
"gemini-1.5-flash": { |
|
"gpqa_diamond": "2024-11-04T12-47-34-05-00_gpqa-diamond_cL5kQj8DWbRfxz79piTSdy.json", |
|
"arc_challenge": "2024-11-04T12-45-59-05-00_arc-challenge_YQLMHfEXqeYgGJY86EB9bp.json", |
|
"math": "2024-11-04T15-25-38-05-00_math_eaYBRMFgo8p6VUUCYxnCWj.json", |
|
"drop": "2024-11-04T12-52-08-05-00_drop_5i253AQzbENgHTYN4ATemV.json", |
|
"mmlu_pro": "2024-11-04T19-44-13-05-00_mmlu-pro_8GrR6wUsYNkthiZNMmLa8y.json", |
|
"ifeval": "2024-11-04T12-51-30-05-00_ifeval_ZATErMbLHoyxh4kDaSqy8j.json", |
|
"hellaswag": "2024-11-05T23-19-25-05-00_hellaswag_MRffohuzgVjighGb8FoqSJ.json", |
|
"winogrande": "2024-11-04T12-48-29-05-00_winogrande_Hmqo6Ydz3nfCnQAdUwgrbD.json", |
|
"humaneval": "2024-11-04T12-50-47-05-00_humaneval_9j4rYguKeKmxEoD9VuddwX.json", |
|
"arc_easy": "2024-11-04T12-39-50-05-00_arc-easy_NwmTEw6C8VSCXzzwZCFy48.json", |
|
"gsm8k": "2024-11-04T15-22-21-05-00_gsm8k_hdJs3Z6XzpR5netTcWLXJT.json", |
|
"mmlu": "2024-11-04T16-26-13-05-00_mmlu_QvfQ46qJen2bvxiktHu86H.json", |
|
"gdm_intercode_ctf": "2024-11-15T20-52-53-05-00_gdm-intercode-ctf_oLYr3H6bFtrcmgM6EABmNt.json" |
|
}, |
|
"o1": { |
|
"winogrande": "2025-01-20T16-46-06-05-00_winogrande_YUtAdEsForRffqe4Sm3wtR.json", |
|
"humaneval": "2025-01-17T14-59-12-05-00_humaneval_RRL8GMy9NakTxUHsDVWNng.json", |
|
"mmmu_open": "2025-01-20T22-48-09-05-00_mmmu-open_oBzxJBYbvnktbbAwhoCrYK.json", |
|
"mmlu_pro": "2025-01-20T14-02-37-05-00_mmlu-pro_EvDzvqaahQwhv6fJovN4BT.json", |
|
"math": "2025-01-17T15-03-22-05-00_math_6BbvHFF8hLMsVYozyNLbyQ.json", |
|
"arc_easy": "2025-01-17T11-29-26-05-00_arc-easy_DFbir4BdgQDbKd52r7tRKR.json", |
|
"arc_challenge": "2025-01-17T11-44-42-05-00_arc-challenge_PsWXaBqrgv3EcTZC55gRzJ.json", |
|
"gsm8k": "2025-01-17T12-56-38-05-00_gsm8k_iD8275qeyNTgX523pn45bF.json", |
|
"gpqa_diamond": "2025-01-17T11-53-53-05-00_gpqa-diamond_EJV7ULFSQLRoFTEqsv3t6q.json", |
|
"hellaswag": "2025-01-17T13-14-39-05-00_hellaswag_73sQJFnwpzWjTvEqKjUk4M.json", |
|
"mmmu_multiple_choice": "2025-01-20T21-04-57-05-00_mmmu-multiple-choice_MctxjookaeTLCL8KpUeazT.json" |
|
}, |
|
"claude-3-5-sonnet-20241022": { |
|
"mmmu_multiple_choice": "2025-01-21T11-20-03-05-00_mmmu-multiple-choice_CWhKvGdoFo6pdHhDyi9GNm.json", |
|
"mmlu_pro": "2025-01-16T19-01-05-05-00_mmlu-pro_3vi84or97gQupuj5sT6vgZ.json", |
|
"hellaswag": "2025-01-15T15-09-33-05-00_hellaswag_QXqFxojvSToMu8ckHEMLkB.json", |
|
"gpqa_diamond": "2025-01-15T13-56-36-05-00_gpqa-diamond_eg4gFaMRENjnnYvQNtSB59.json", |
|
"gsm8k": "2025-01-15T14-23-25-05-00_gsm8k_nHB8Z4uZAwRAZFYpKmTptA.json", |
|
"mmmu_open": "2025-01-21T11-24-21-05-00_mmmu-open_SSjv3Dq9gZkEEUnvJUd5xf.json", |
|
"arc_easy": "2025-01-15T10-06-24-05-00_arc-easy_oBReQZQM5SAwMMD2jFshPb.json", |
|
"arc_challenge": "2025-01-15T10-12-11-05-00_arc-challenge_X8i6caCzkcQo5AT5zXkXso.json", |
|
"mmlu": "2025-01-16T15-16-51-05-00_mmlu_NFDs2kxmh3kQEbpbd8sz3w.json", |
|
"math": "2025-01-16T12-29-54-05-00_math_NvNQU58M8r3fpiwPGnvq8h.json", |
|
"ifeval": "2025-01-16T11-28-44-05-00_ifeval_fmWxch4ZjbmYCST6yUZsdV.json", |
|
"humaneval": "2025-01-16T11-26-12-05-00_humaneval_kUASiaNd9uZfWvCwYHhdF5.json", |
|
"winogrande": "2025-01-16T22-09-41-05-00_winogrande_mSWGAKg75E5RP79KWizvb9.json", |
|
"drop": "2025-01-15T10-15-15-05-00_drop_Z9A2Y84HYponNxnzNT9TNq.json" |
|
}, |
|
"c4ai-command-r-plus": { |
|
"ifeval": "2024-10-30T17-23-04-04-00_ifeval_RGucUMwdGmUnRpqyMTZTzW.json", |
|
"winogrande": "2024-10-30T14-42-18-04-00_winogrande_bY8yg7aRR5dCCK7NDCZEcc.json", |
|
"arc_challenge": "2024-10-29T17-30-03-04-00_arc-challenge_XB7LURXEGaxskWuLtYwdnW.json", |
|
"drop": "2024-10-30T12-06-30-04-00_drop_itY9cLiYAW2BF7NTeDceNd.json", |
|
"math": "2024-10-30T17-26-34-04-00_math_kohBUMpMFuMsR4jz4vUNWM.json", |
|
"gpqa_diamond": "2024-10-29T22-47-45-04-00_gpqa-diamond_JKpb6ya4pec9hh7uovPPCZ.json", |
|
"mmlu_pro": "2024-10-31T01-11-38-04-00_mmlu-pro_gZVAuy3zMKR23BieM5PqAX.json", |
|
"humaneval": "2024-10-30T17-22-23-04-00_humaneval_5ByPqUhoofSbKgvsUQNFCX.json", |
|
"gsm8k": "2024-10-30T15-03-35-04-00_gsm8k_QxbfbriJsKGQAg96JyjkoT.json", |
|
"hellaswag": "2024-10-30T15-18-17-04-00_hellaswag_UYyBTR6N8VJnKRmnbCrB8N.json", |
|
"mmlu": "2024-10-30T21-55-26-04-00_mmlu_JUPPLTzfe3Kme6UuorPTqg.json", |
|
"arc_easy": "2024-10-29T17-10-40-04-00_arc-easy_UvprihBMLXPF8JENVLRkdx.json" |
|
}, |
|
"gpt-4o-mini": { |
|
"drop": "2024-10-30T17-36-25-04-00_drop_6TzJGqqEkpFUCxGD4QejV6.json", |
|
"humaneval": "2024-10-30T21-14-41-04-00_humaneval_Z9aXdUERuwYxoTheZ5GANC.json", |
|
"gpqa_diamond": "2024-10-30T19-31-26-04-00_gpqa-diamond_7aNe9wQiQKpNN96mfaWBPg.json", |
|
"mmmu_open": "2025-01-20T23-13-27-05-00_mmmu-open_GWi6XNYUSLq99BdabtScGm.json", |
|
"arc_challenge": "2024-10-30T17-34-51-04-00_arc-challenge_FbGgLswBZbRE4EhWiMyRt6.json", |
|
"mmlu": "2024-10-31T10-49-43-04-00_mmlu_oGb9mspeGbYS2gfbkknskN.json", |
|
"hellaswag": "2024-10-30T19-35-34-04-00_hellaswag_2SAz3cvMpDxFaApdHDR3s4.json", |
|
"ifeval": "2024-10-30T21-15-06-04-00_ifeval_nYs9KujQMQjcpbpbLtVx8G.json", |
|
"mmmu_multiple_choice": "2025-01-20T23-10-01-05-00_mmmu-multiple-choice_c5rLkrXkV83udX6DVJui5F.json", |
|
"arc_easy": "2024-10-30T17-29-56-04-00_arc-easy_XcEzqqPqJsRV29NqYDfnNo.json", |
|
"winogrande": "2024-10-31T00-59-07-04-00_winogrande_Ci55vHvbGGW38zVpMCwtWa.json", |
|
"mmlu_pro": "2024-10-30T22-43-30-04-00_mmlu-pro_Dc2uu3EV7MJtjg6gg5Y9qH.json", |
|
"math": "2024-10-30T21-15-49-04-00_math_YsWdRzpqMq2dqQ9SPKfack.json", |
|
"gsm8k": "2024-10-30T19-32-39-04-00_gsm8k_nLSssETKDDWNktAFWnVwfv.json" |
|
}, |
|
"Meta-Llama-3.1-70B-Instruct": { |
|
"hellaswag": "2024-10-30T00-45-54-04-00_hellaswag_BKfQG9yGAr383MGnooMLBH.json", |
|
"drop": "2024-10-29T21-01-02-04-00_drop_LzAWvLWkNrNKu5qf56wXRo.json", |
|
"gpqa_diamond": "2024-10-29T23-41-39-04-00_gpqa-diamond_TdLdYmVM6GCVMAECcXkuhj.json", |
|
"winogrande": "2024-10-30T09-20-56-04-00_winogrande_WnUgkSRhSMvh3zUjnuJWQZ.json", |
|
"gsm8k": "2024-10-30T00-03-31-04-00_gsm8k_bKsUfCAfcmBCeryboNaLoX.json", |
|
"math": "2024-10-30T02-34-50-04-00_math_2xiNcrGih26uzJdG4q88bM.json", |
|
"ifeval": "2024-10-30T02-29-32-04-00_ifeval_Dwh3CF2ZYFrvw7UcTwrsvK.json", |
|
"arc_challenge": "2024-10-29T20-58-56-04-00_arc-challenge_oFL5wFjT7KwNFhMFfe72JN.json", |
|
"arc_easy": "2024-10-29T20-53-12-04-00_arc-easy_UXzR7cDeNteP39NoXUYnhm.json", |
|
"mmlu_pro": "2024-10-30T06-11-16-04-00_mmlu-pro_oQiEBJdeKtEEt4cm9KL7uy.json", |
|
"humaneval": "2024-10-30T02-28-25-04-00_humaneval_KcJV2rHuHJ2JLxijihEkcW.json", |
|
"mmlu": "2024-10-30T03-51-50-04-00_mmlu_6SNjs2QmPRvqGnvbnNtaqb.json" |
|
}, |
|
"gpt-4o": { |
|
"gpqa_diamond": "2024-10-31T03-29-33-04-00_gpqa-diamond_nFmRv5MJiYjHjezmq4V6Va.json", |
|
"arc_challenge": "2024-10-31T01-45-55-04-00_arc-challenge_nrsPPxh4DpzgLPQDFdcfVp.json", |
|
"gsm8k": "2024-10-31T03-31-16-04-00_gsm8k_jVXeSvHowbietZCFsFYCwB.json", |
|
"mmlu": "2024-10-31T10-49-43-04-00_mmlu_GarLpfQFSpM3C22nbbGp54.json", |
|
"ifeval": "2024-10-31T05-00-11-04-00_ifeval_jxreUu8JqRdkrcHP4E3hLR.json", |
|
"mmlu_pro": "2024-10-31T06-59-42-04-00_mmlu-pro_EuAKDwAWSfNVpqyyqrf2Ba.json", |
|
"mmmu_open": "2025-01-20T23-07-46-05-00_mmmu-open_d3Q2HvuPZzEX6FAM4NBhnp.json", |
|
"winogrande": "2024-10-31T09-02-03-04-00_winogrande_44kKF7M9mKoqVC7ixZVXuq.json", |
|
"drop": "2024-10-31T01-47-20-04-00_drop_3gxDcn6vUoR3nvHX9BcSq4.json", |
|
"arc_easy": "2024-10-31T01-41-34-04-00_arc-easy_nUavRHdiRVfrxo6dmCPadh.json", |
|
"mmmu_multiple_choice": "2025-01-20T23-03-21-05-00_mmmu-multiple-choice_eoycAFLMirSqiURdXmBP2e.json", |
|
"humaneval": "2024-10-31T04-59-42-04-00_humaneval_nmJcd84CcNKjWS8fBfMbZM.json", |
|
"math": "2024-10-31T05-01-22-04-00_math_cDSpKPp3nLrFy8uYfYKEbM.json", |
|
"hellaswag": "2024-10-31T03-33-47-04-00_hellaswag_JNnnPuz3dhZRpyXzizMUBF.json" |
|
}, |
|
"Mistral-Large-Instruct-2407": { |
|
"drop": "2024-10-31T01-56-12-04-00_drop_NtvuCoU2LoMbH8DztcCTen.json", |
|
"ifeval": "2024-10-31T06-30-16-04-00_ifeval_TLkvCSFEWo4PLv6hAha7YB.json", |
|
"mmlu": "2024-10-31T07-21-48-04-00_mmlu_YnUhmHoStr3WuJdchWmNPt.json", |
|
"gpqa_diamond": "2024-10-31T04-22-52-04-00_gpqa-diamond_SuZUZxGdqS2ZecbLRNkKd4.json", |
|
"gsm8k": "2024-10-31T04-28-49-04-00_gsm8k_5tQp9tbwUMj6NpjNKCAfVm.json", |
|
"math": "2024-10-31T06-33-09-04-00_math_2CmjBedAfUxqvmcHRdBgyB.json", |
|
"arc_easy": "2024-10-31T01-48-39-04-00_arc-easy_YbfuBT3usZXt2xgZkkR5dq.json", |
|
"mmlu_pro": "2024-10-31T09-41-25-04-00_mmlu-pro_fyYT4aabPesfY5TpzFMPnd.json", |
|
"humaneval": "2024-10-31T06-29-24-04-00_humaneval_nu8SUSGekKJWB8HLKDigYK.json", |
|
"hellaswag": "2024-10-31T04-50-00-04-00_hellaswag_ZzQoZ6gkRQsTzMhQr7GYNn.json", |
|
"arc_challenge": "2024-10-31T01-54-13-04-00_arc-challenge_WfQRhMkFcywefpU46isBVP.json", |
|
"winogrande": "2024-10-31T11-57-58-04-00_winogrande_TP3UGwpp37Dyv6ks9Ty5Hk.json" |
|
}, |
|
"Qwen2.5-72B-Instruct": { |
|
"arc_challenge": "2024-10-31T13-46-34-04-00_arc-challenge_FSybKYYwpXVLQag8VwpjKe.json", |
|
"mmlu_pro": "2024-11-01T20-31-04-04-00_mmlu-pro_2TfSPmsVmKatntHy2CnR7A.json", |
|
"gpqa_diamond": "2024-10-31T13-48-32-04-00_gpqa-diamond_8qSySicySUyNvRRYVFBKLU.json", |
|
"winogrande": "2024-10-31T14-46-29-04-00_winogrande_CX692dYh53gJ6JigT9GMpa.json", |
|
"mmlu": "2024-11-01T10-08-50-04-00_mmlu_AgK27yYvmAo2LxotBH7ZL9.json", |
|
"hellaswag": "2024-11-01T02-55-55-04-00_hellaswag_RSk8rGcQWg3HRrLffTNoiM.json", |
|
"gsm8k": "2024-11-01T01-15-16-04-00_gsm8k_3h4W6xZjXpz9oCwtgKNYzo.json", |
|
"arc_easy": "2024-10-31T13-40-08-04-00_arc-easy_3JUyzfoEHxhSBUdCU2AaVC.json", |
|
"math": "2024-11-01T10-06-46-04-00_math_UUpS2R9eQc9KxBxkanT2gE.json", |
|
"ifeval": "2024-10-31T14-51-45-04-00_ifeval_VGxA7gTZLZSruceM9Ci37C.json", |
|
"humaneval": "2024-10-31T14-49-39-04-00_humaneval_9u7khnxivCDroJoPNRFpjs.json", |
|
"drop": "2024-10-31T15-03-20-04-00_drop_DDLi98VhiV2bLzuw7fx6H4.json" |
|
} |
|
} |