benchbench / cache /aggregate_scoress_cache_1edd56d721757789836f081c38c15400.csv
Yotam-Perlitz
revise cache
292d764
raw
history blame
2.86 kB
model,score
arx_0.3,1.0
claude_3.5_sonnet,0.9876543209876543
grok_2,0.9753086419753086
gpt_4o_2024_05_13,0.9629629629629629
grok_2_mini,0.9506172839506173
gemini_1.5_pro,0.9382716049382716
claude_3_opus,0.9259259259259259
qwen2_72b_chat,0.9135802469135802
magnum_72b_v1,0.9012345679012346
gpt_4_turbo,0.8888888888888888
deepseek_coder_v2_instruct,0.8765432098765432
higgs_llama3_70b,0.8641975308641975
gpt_4o_mini,0.8518518518518519
llama3.1_70b_instruct,0.8395061728395061
gemini_1.5_flash,0.8271604938271605
yi_large,0.8148148148148148
claude_3_sonnet,0.8024691358024691
llama3_70b_instruct,0.7901234567901234
phi3_medium_4k,0.7777777777777778
qwen2_72b_32k,0.7654320987654321
deepseek_v2_chat,0.7530864197530864
llama3_70b,0.7407407407407407
qwen1.5_72b_chat,0.7283950617283951
llama3.1_70b,0.7160493827160493
yi_1.5_34b_chat,0.7037037037037037
gemma_2_9b_it,0.691358024691358
phi3_medium_128k,0.6790123456790124
mammoth2_8x7b_plus,0.6666666666666666
qwen1.5_110b,0.654320987654321
glm_4_9b_chat,0.6419753086419753
glm_4_9b,0.6296296296296297
phi_3.5_mini_instruct,0.6172839506172839
qwen2_7b_instruct,0.6049382716049383
yi_1.5_9b_chat,0.5925925925925926
phi3_mini_4k,0.5802469135802469
gemma_2_9b,0.5679012345679012
mistral_nemo_instruct_2407,0.5555555555555556
llama3.1_8b_instruct,0.5432098765432098
phi3_mini_128k,0.5308641975308642
mammoth2_8b_plus,0.5185185185185185
mixtral_8x7b_instruct_v0.1,0.5061728395061729
yi_34b,0.49382716049382713
mathstral_7b_v0.1,0.48148148148148145
deepseek_coder_v2_lite_instruct,0.4691358024691358
mixtral_8x7b_v0.1,0.4567901234567901
llama3_8b_instruct,0.4444444444444444
mammoth2_7b_plus,0.43209876543209874
qwen2_7b,0.41975308641975306
mistral_nemo_base_2407,0.4074074074074074
wizardlm_2_8x22b,0.3950617283950617
yi_1.5_6b_chat,0.38271604938271603
qwen1.5_14b_chat,0.37037037037037035
c4ai_command_r_v01,0.345679012345679
staring_7b,0.345679012345679
llama_2_70b,0.3333333333333333
openchat_3.5_8b,0.32098765432098764
internmath_20b_plus,0.30864197530864196
llama3_smaug_8b,0.2962962962962963
llama3.1_8b,0.2839506172839506
llama3_8b,0.2716049382716049
deepseekmath_7b_instruct,0.25925925925925924
deepseek_coder_v2_lite_base,0.24691358024691357
gemma_7b,0.2345679012345679
internmath_7b_plus,0.2222222222222222
zephyr_7b_beta,0.20987654320987653
mistral_7b_v0.1,0.19753086419753085
mistral_7b_instruct_v0.2,0.18518518518518517
mistral_7b_v0.2,0.1728395061728395
qwen1.5_7b_chat,0.16049382716049382
yi_6b_chat,0.14814814814814814
neo_7b_instruct,0.13580246913580246
yi_6b,0.12345679012345678
neo_7b,0.1111111111111111
mistral_7b_instruct_v0.1,0.09876543209876543
llama_2_13b,0.08641975308641975
llemma_7b,0.07407407407407407
qwen2_1.5b_instruct,0.06172839506172839
qwen2_1.5b,0.04938271604938271
llama_2_7b,0.037037037037037035
qwen2_0.5b_instruct,0.024691358024691357
gemma_2b,0.012345679012345678
qwen2_0.5b,0.0