benchbench / cache /aggregate_scoress_cache_230b5a189df3af8cde801ce251b7b2ef.csv
Yotam-Perlitz
revise cache
292d764
raw
history blame
2.79 kB
model,score
claude_3_5_sonnet_20240620,1.0
gpt_4o_2024_08_06,0.9855072463768116
chatgpt_4o_latest,0.9710144927536232
gpt_4o_2024_05_13,0.9565217391304348
llama3.1_405b_instruct_turbo,0.9420289855072463
gemini_1.5_pro_exp_0827,0.927536231884058
gpt_4_turbo_2024_04_09,0.9130434782608695
gemini_1.5_pro_exp_0801,0.8985507246376812
claude_3_opus_20240229,0.8840579710144928
gpt_4_0125_preview,0.8695652173913043
dracarys_llama3.1_70b_instruct,0.855072463768116
llama3.1_70b_instruct_turbo,0.8405797101449275
mistral_large_2407,0.8260869565217391
gemini_1.5_flash_exp_0827,0.8115942028985508
deepseek_coder_v2,0.7971014492753623
deepseek_chat_v2,0.782608695652174
gpt_4_0613,0.7681159420289855
gemini_1.5_pro_api_0514,0.7536231884057971
gpt_4o_mini_2024_07_18,0.7391304347826086
gemma_2_27b_it,0.7246376811594203
dracarys_72b_instruct,0.7101449275362319
qwen2_72b_instruct,0.6956521739130435
hermes_3_llama3.1_70b,0.6811594202898551
gemini_1.5_flash_api_0514,0.6666666666666666
smaug_qwen2_72b_instruct,0.6521739130434783
mistral_large_2402,0.6376811594202898
claude_3_sonnet_20240229,0.6231884057971014
llama3_70b_instruct,0.6086956521739131
claude_3_haiku_20240307,0.5942028985507246
mixtral_8x22b_instruct_v0.1,0.5797101449275363
phi_3.5_moe_instruct,0.5652173913043478
gpt_3.5_turbo_0125,0.5507246376811594
mistral_small_2402,0.5362318840579711
command_r_plus,0.5217391304347826
gemma_2_9b_it,0.5072463768115942
phi_3_medium_4k_instruct,0.4927536231884058
phi_3_medium_128k_instruct,0.4782608695652174
phi_3_small_128k_instruct,0.463768115942029
qwen1.5_110b_chat,0.4492753623188406
deepseek_coder_v2_lite_instruct,0.43478260869565216
qwen1.5_72b_chat,0.42028985507246375
open_mistral_nemo,0.4057971014492754
phi_3.5_mini_instruct,0.391304347826087
llama3.1_8b_instruct_turbo,0.37681159420289856
phi_3_small_8k_instruct,0.36231884057971014
llama3_8b_instruct,0.34782608695652173
command_r,0.3333333333333333
qwen2_7b_instruct,0.3188405797101449
phi_3_mini_128k_instruct,0.30434782608695654
phi_3_mini_4k_instruct,0.2898550724637681
mathstral_7b_v0.1,0.2753623188405797
openhermes_2.5_mistral_7b,0.2608695652173913
mixtral_8x7b_instruct_v0.1,0.2463768115942029
mistral_7b_instruct_v0.3,0.2318840579710145
mistral_7b_instruct_v0.2,0.21739130434782608
gemma_1.1_7b_it,0.2028985507246377
zephyr_7b_alpha,0.18840579710144928
qwen1.5_7b_chat,0.17391304347826086
deepseek_v2_lite_chat,0.15942028985507245
zephyr_7b_beta,0.14492753623188406
starling_lm_7b_beta,0.13043478260869565
vicuna_7b_v1.5_16k,0.11594202898550725
vicuna_7b_v1.5,0.10144927536231885
llama_2_7b_chat,0.08695652173913043
qwen1.5_4b_chat,0.07246376811594203
qwen2_1.5b_instruct,0.057971014492753624
yi_6b_chat,0.043478260869565216
qwen2_0.5b_instruct,0.028985507246376812
qwen1.5_1.8b_chat,0.014492753623188406
qwen1.5_0.5b_chat,0.0