|
|
|
{ |
|
"qwen2.5-3b-instruct": { |
|
"success_rate": 0.0624, |
|
"relative_action_count": 2.4255102697302697 |
|
}, |
|
"llama-3.2-3b-instruct": { |
|
"success_rate": 0.064, |
|
"relative_action_count": 2.4438042524142523 |
|
}, |
|
"llama-3.1-8b-instruct": { |
|
"success_rate": 0.11599999999999999, |
|
"relative_action_count": 2.3321907026307027 |
|
}, |
|
"gpt-3.5-turbo": { |
|
"success_rate": 0.12079999999999999, |
|
"relative_action_count": 2.34957508047508 |
|
}, |
|
"gemma-2-9b-it": { |
|
"success_rate": 0.132, |
|
"relative_action_count": 2.3394684981684977 |
|
}, |
|
"qwen2.5-7b-instruct": { |
|
"success_rate": 0.1664, |
|
"relative_action_count": 2.3259762459762454 |
|
}, |
|
"gemma-2-27b-it": { |
|
"success_rate": 0.1696, |
|
"relative_action_count": 2.28467764013764 |
|
}, |
|
"llama-3.1-70b-instruct": { |
|
"success_rate": 0.256, |
|
"relative_action_count": 1.9653564912864916 |
|
}, |
|
"yi-lightning": { |
|
"success_rate": 0.30720000000000003, |
|
"relative_action_count": 2.031278719058719 |
|
}, |
|
"gpt-4o-mini": { |
|
"success_rate": 0.31040000000000006, |
|
"relative_action_count": 1.9804984304584305 |
|
}, |
|
"llama-3.3-70b-instruct": { |
|
"success_rate": 0.33840000000000003, |
|
"relative_action_count": 1.90917626040626 |
|
}, |
|
"claude-3.5-haiku": { |
|
"success_rate": 0.3592000000000001, |
|
"relative_action_count": 2.0113219180819177 |
|
}, |
|
"gemini-1.5-pro": { |
|
"success_rate": 0.36879999999999996, |
|
"relative_action_count": 1.9371788544788544 |
|
}, |
|
"qwen2.5-14b-instruct": { |
|
"success_rate": 0.3816, |
|
"relative_action_count": 1.9383408547008547 |
|
}, |
|
"qwen2.5-72b-instruct": { |
|
"success_rate": 0.4008, |
|
"relative_action_count": 1.8648658674658674 |
|
}, |
|
"mistral-large-instruct-2411": { |
|
"success_rate": 0.4144, |
|
"relative_action_count": 1.795764299034299 |
|
}, |
|
"qwen2.5-32b-instruct": { |
|
"success_rate": 0.43920000000000003, |
|
"relative_action_count": 1.8831460717060717 |
|
}, |
|
"claude-3.5-sonnet": { |
|
"success_rate": 0.44000000000000006, |
|
"relative_action_count": 1.6636790032190032 |
|
}, |
|
"gpt-4o": { |
|
"success_rate": 0.44960000000000006, |
|
"relative_action_count": 1.7164597657897656 |
|
}, |
|
"deepseek-v3": { |
|
"success_rate": 0.5496000000000001, |
|
"relative_action_count": 1.705338828948829 |
|
}, |
|
"deepseek-r1": { |
|
"success_rate": 0.6112, |
|
"relative_action_count": 1.4205231568431569 |
|
}, |
|
"qwq-32b": { |
|
"success_rate": 0.6112, |
|
"relative_action_count": 1.5151790675990677 |
|
}, |
|
"o1-mini": { |
|
"success_rate": 0.6296, |
|
"relative_action_count": 1.4230264535464534 |
|
} |
|
} |
|
|
|
|