Sebastian Deatc commited on
Commit
dee6070
·
verified ·
1 Parent(s): 5441bc5

Upload sorted_results.csv

Browse files
Files changed (1) hide show
  1. sorted_results.csv +59 -0
sorted_results.csv ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,A%,W%,H%,R%
2
+ google/gemma-2-27b-it,0.023575638506876228,0.049901768172888016,0.8681728880157171,0.05834970530451866
3
+ google/gemma-2-9b-it,0.03143418467583497,0.04754420432220039,0.8774066797642436,0.04361493123772102
4
+ meta-llama/llama-2-70b-chat,0.03929273084479371,0.021611001964636542,0.8683693516699411,0.07072691552062868
5
+ meta-llama/llama-2-13b-chat,0.053241650294695485,0.0243614931237721,0.6774066797642436,0.2449901768172888
6
+ google/gemini-flash-1.5,0.05363457760314342,0.06522593320235756,0.8721021611001964,0.009037328094302554
7
+ google/gemini-pro,0.05933202357563851,0.08369351669941061,0.28447937131630646,0.5724950884086444
8
+ microsoft/wizardlm-2-8x22b,0.07701375245579568,0.3341846758349705,0.5829076620825148,0.005893909626719057
9
+ anthropic/claude-3-haiku:beta,0.07760314341846758,0.07583497053045186,0.4151277013752456,0.431434184675835
10
+ qwen/qwen-2-72b-instruct,0.08133595284872298,0.1324165029469548,0.5320235756385069,0.2542239685658153
11
+ meta-llama/codellama-34b-instruct,0.08447937131630648,0.07858546168958742,0.7445972495088409,0.09233791748526522
12
+ microsoft/phi-3-medium-4k-instruct,0.09646365422396856,0.1638506876227898,0.21591355599214146,0.5237721021611002
13
+ deepseek/deepseek-chat,0.10039292730844794,0.2430255402750491,0.46149312377210217,0.19508840864440077
14
+ perplexity/llama-3.1-sonar-large-128k-chat,0.10039292730844794,0.2082514734774067,0.4534381139489195,0.23791748526522594
15
+ deepseek/deepseek-coder,0.10569744597249509,0.27838899803536343,0.3155206286836935,0.3003929273084479
16
+ perplexity/llama-3-sonar-small-32k-chat,0.10805500982318271,0.07662082514734773,0.07662082514734773,0.7387033398821218
17
+ meta-llama/llama-3.1-8b-instruct,0.10943025540275049,0.1031434184675835,0.03791748526522593,0.74950884086444
18
+ meta-llama/llama-3-70b-instruct,0.11335952848722987,0.2444007858546169,0.13909626719056975,0.5031434184675835
19
+ microsoft/phi-3-mini-128k-instruct,0.11473477406679765,0.17701375245579568,0.46149312377210217,0.24675834970530452
20
+ microsoft/phi-3-medium-128k-instruct,0.11846758349705304,0.2206286836935167,0.16267190569744597,0.49823182711198427
21
+ meta-llama/llama-3.1-70b-instruct,0.12337917485265226,0.21237721021611003,0.08055009823182711,0.5836935166994106
22
+ qwen/qwen-2-7b-instruct,0.12396856581532416,0.17917485265225933,0.28349705304518663,0.41335952848722984
23
+ meta-llama/llama-3-8b-instruct,0.1255402750491159,0.13536345776031433,0.0650294695481336,0.6740667976424362
24
+ perplexity/llama-3.1-sonar-small-128k-chat,0.12888015717092338,0.24165029469548133,0.31237721021611004,0.31709233791748526
25
+ databricks/dbrx-instruct,0.15658153241650294,0.29882121807465617,0.2650294695481336,0.27956777996070725
26
+ qwen/qwen-7b-chat,0.15913555992141454,0.3182711198428291,0.4793713163064833,0.043222003929273084
27
+ mistralai/mixtral-8x22b-instruct,0.16345776031434184,0.3593320235756385,0.3948919449901768,0.08231827111984283
28
+ recursal/eagle-7b,0.1667976424361493,0.08526522593320236,0.030451866404715127,0.7174852652259333
29
+ rwkv/rwkv-5-world-3b,0.16699410609037327,0.08428290766208252,0.029666011787819253,0.7190569744597249
30
+ mistralai/mixtral-8x7b-instruct,0.1831041257367387,0.42455795677799607,0.33909626719056973,0.053241650294695485
31
+ openchat/openchat-7b,0.1893909626719057,0.28978388998035365,0.28919449901768174,0.23163064833005895
32
+ sao10k/l3-euryale-70b,0.1925343811394892,0.5113948919449902,0.2294695481335953,0.06660117878192534
33
+ perplexity/llama-3-sonar-large-32k-chat,0.1944990176817289,0.13555992141453832,0.10412573673870335,0.5658153241650294
34
+ perplexity/llama-3.1-sonar-large-128k-online,0.19646365422396855,0.4950884086444008,0.30648330058939094,0.0019646365422396855
35
+ mistralai/mistral-7b-instruct-v0.3,0.19783889980353633,0.45952848722986245,0.3225933202357564,0.020039292730844795
36
+ google/gemma-7b-it,0.206286836935167,0.1905697445972495,0.3163064833005894,0.2868369351669941
37
+ openchat/openchat-8b,0.22161100196463654,0.34047151277013754,0.24695481335952849,0.19096267190569743
38
+ mistralai/mistral-tiny,0.2225933202357564,0.3703339882121807,0.3609037328094303,0.04616895874263261
39
+ cognitivecomputations/dolphin-mixtral-8x22b,0.22730844793713162,0.38231827111984285,0.2056974459724951,0.18467583497053044
40
+ phind/phind-codellama-34b,0.22986247544204322,0.30648330058939094,0.30844793713163066,0.15520628683693516
41
+ mistralai/mistral-nemo,0.2530451866404715,0.34145383104125737,0.35952848722986247,0.045972495088408644
42
+ cognitivecomputations/dolphin-mixtral-8x7b,0.2989054167836093,0.47207409486387875,0.1644681448217794,0.06455234353073253
43
+ undi95/toppy-m-7b,0.31237721021611004,0.4180746561886051,0.1781925343811395,0.09135559921414538
44
+ jondurbin/airoboros-l2-70b,0.32632612966601177,0.42220039292730843,0.1595284872298625,0.09194499017681729
45
+ lizpreciatior/lzlv-70b-fp16-hf,0.3412573673870334,0.38349705304518666,0.23948919449901768,0.03575638506876228
46
+ nousresearch/nous-hermes-2-mistral-7b-dpo,0.3418467583497053,0.44793713163064836,0.16895874263261296,0.0412573673870334
47
+ mistralai/codestral-mamba,0.3477406679764244,0.381139489194499,0.25147347740667975,0.019646365422396856
48
+ nousresearch/nous-hermes-2-mixtral-8x7b-dpo,0.34931237721021613,0.47387033398821216,0.16031434184675836,0.016502946954813358
49
+ nousresearch/nous-hermes-yi-34b,0.35717092337917483,0.4019646365422397,0.17406679764243616,0.06679764243614932
50
+ teknium/openhermes-2.5-mistral-7b,0.3575638506876228,0.41846758349705304,0.16168958742632614,0.062278978388998034
51
+ 01-ai/yi-34b-chat,0.3614931237721022,0.1512770137524558,0.06679764243614932,0.4204322200392927
52
+ nousresearch/hermes-2-pro-llama-3-8b,0.36345776031434185,0.4638506876227898,0.14931237721021612,0.02337917485265226
53
+ nousresearch/nous-capybara-7b,0.4066797642436149,0.4911591355599214,0.08840864440078586,0.0137524557956778
54
+ open-orca/mistral-7b-openorca,0.41846758349705304,0.4263261296660118,0.1100196463654224,0.04518664047151277
55
+ togethercomputer/stripedhyena-nous-7b,0.43831041257367387,0.41277013752455793,0.12907662082514734,0.019842829076620824
56
+ teknium/openhermes-2-mistral-7b,0.4518664047151277,0.4361493123772102,0.10019646365422397,0.011787819253438114
57
+ undi95/remm-slerp-l2-13b,0.4656188605108055,0.3237721021611002,0.1662082514734774,0.044400785854616896
58
+ google/palm-2-chat-bison-32k,0.47347740667976423,0.3005893909626719,0.13359528487229863,0.09233791748526522
59
+ nousresearch/nous-hermes-llama2-13b,0.5108055009823183,0.3791748526522593,0.09823182711198428,0.011787819253438114