[ { "model_name": "claude-3-7-sonnet-20250219", "results": { "mmlu_results": [], "unified_exam_results": [ { "category": "Average", "score": 11.0833 }, { "category": "Armenian language and literature", "score": 10.5 }, { "category": "Armenian history", "score": 7.75 }, { "category": "Mathematics", "score": 15.0 } ] } }, { "model_name": "claude-3-5-sonnet-20241022", "results": { "mmlu_results": [ { "category": "Average", "score": 0.6958 }, { "category": "Biology", "score": 0.8667 }, { "category": "Business", "score": 0.803 }, { "category": "Chemistry", "score": 0.7579 }, { "category": "Computer Science", "score": 0.7059 }, { "category": "Economics", "score": 0.7887 }, { "category": "Engineering", "score": 0.5625 }, { "category": "Health", "score": 0.6618 }, { "category": "History", "score": 0.6552 }, { "category": "Law", "score": 0.4944 }, { "category": "Math", "score": 0.7788 }, { "category": "Other", "score": 0.6494 }, { "category": "Philosophy", "score": 0.5476 }, { "category": "Physics", "score": 0.7523 }, { "category": "Psychology", "score": 0.7164 } ], "unified_exam_results": [ { "category": "Average", "score": 10.6667 }, { "category": "Armenian language and literature", "score": 10.0 }, { "category": "Armenian history", "score": 9.25 }, { "category": "Mathematics", "score": 12.75 } ] } }, { "model_name": "gemini-2.0-flash", "results": { "mmlu_results": [ { "category": "Average", "score": 0.7247 }, { "category": "Biology", "score": 0.85 }, { "category": "Business", "score": 0.8182 }, { "category": "Chemistry", "score": 0.7895 }, { "category": "Computer Science", "score": 0.7353 }, { "category": "Economics", "score": 0.8169 }, { "category": "Engineering", "score": 0.6 }, { "category": "Health", "score": 0.75 }, { "category": "History", "score": 0.5517 }, { "category": "Law", "score": 0.5281 }, { "category": "Math", "score": 0.8673 }, { "category": "Other", "score": 0.6364 }, { "category": "Philosophy", "score": 0.6429 }, { "category": "Physics", "score": 0.7982 }, { "category": "Psychology", "score": 0.7612 } ], "unified_exam_results": [ { "category": "Average", "score": 9.8333 }, { "category": "Armenian language and literature", "score": 5.5 }, { "category": "Armenian history", "score": 6.75 }, { "category": "Mathematics", "score": 17.25 } ] } }, { "model_name": "gpt-4o", "results": { "mmlu_results": [ { "category": "Average", "score": 0.6758 }, { "category": "Biology", "score": 0.8667 }, { "category": "Business", "score": 0.7424 }, { "category": "Chemistry", "score": 0.6842 }, { "category": "Computer Science", "score": 0.6176 }, { "category": "Economics", "score": 0.7887 }, { "category": "Engineering", "score": 0.5625 }, { "category": "Health", "score": 0.7794 }, { "category": "History", "score": 0.5517 }, { "category": "Law", "score": 0.5393 }, { "category": "Math", "score": 0.7788 }, { "category": "Other", "score": 0.5974 }, { "category": "Philosophy", "score": 0.5476 }, { "category": "Physics", "score": 0.6881 }, { "category": "Psychology", "score": 0.7164 } ], "unified_exam_results": [ { "category": "Average", "score": 8.9167 }, { "category": "Armenian language and literature", "score": 6.75 }, { "category": "Armenian history", "score": 6.75 }, { "category": "Mathematics", "score": 13.25 } ] } }, { "model_name": "qwen-max-2025-01-25", "results": { "mmlu_results": [], "unified_exam_results": [ { "category": "Average", "score": 8.6667 }, { "category": "Armenian language and literature", "score": 7.25 }, { "category": "Armenian history", "score": 4.5 }, { "category": "Mathematics", "score": 14.25 } ] } }, { "model_name": "gemini-1.5-flash", "results": { "mmlu_results": [ { "category": "Average", "score": 0.5592 }, { "category": "Biology", "score": 0.75 }, { "category": "Business", "score": 0.7121 }, { "category": "Chemistry", "score": 0.6947 }, { "category": "Computer Science", "score": 0.5 }, { "category": "Economics", "score": 0.7183 }, { "category": "Engineering", "score": 0.4 }, { "category": "Health", "score": 0.5 }, { "category": "History", "score": 0.4483 }, { "category": "Law", "score": 0.2584 }, { "category": "Math", "score": 0.8319 }, { "category": "Other", "score": 0.3506 }, { "category": "Philosophy", "score": 0.3571 }, { "category": "Physics", "score": 0.6514 }, { "category": "Psychology", "score": 0.6567 } ], "unified_exam_results": [ { "category": "Average", "score": 7.8333 }, { "category": "Armenian language and literature", "score": 4.75 }, { "category": "Armenian history", "score": 3.75 }, { "category": "Mathematics", "score": 15.0 } ] } }, { "model_name": "DeepSeek-V3", "results": { "mmlu_results": [ { "category": "Average", "score": 0.6633 }, { "category": "Biology", "score": 0.8167 }, { "category": "Business", "score": 0.8182 }, { "category": "Chemistry", "score": 0.6947 }, { "category": "Computer Science", "score": 0.7353 }, { "category": "Economics", "score": 0.7887 }, { "category": "Engineering", "score": 0.5875 }, { "category": "Health", "score": 0.6471 }, { "category": "History", "score": 0.4828 }, { "category": "Law", "score": 0.3596 }, { "category": "Math", "score": 0.8584 }, { "category": "Other", "score": 0.5455 }, { "category": "Philosophy", "score": 0.5476 }, { "category": "Physics", "score": 0.6881 }, { "category": "Psychology", "score": 0.7164 } ], "unified_exam_results": [ { "category": "Average", "score": 7.5 }, { "category": "Armenian language and literature", "score": 5.25 }, { "category": "Armenian history", "score": 5.0 }, { "category": "Mathematics", "score": 12.25 } ] } }, { "model_name": "Meta-Llama-3.3-70B-Instruct", "results": { "mmlu_results": [ { "category": "Average", "score": 0.5139 }, { "category": "Biology", "score": 0.7333 }, { "category": "Business", "score": 0.5303 }, { "category": "Chemistry", "score": 0.5895 }, { "category": "Computer Science", "score": 0.3824 }, { "category": "Economics", "score": 0.6338 }, { "category": "Engineering", "score": 0.4875 }, { "category": "Health", "score": 0.5735 }, { "category": "History", "score": 0.4138 }, { "category": "Law", "score": 0.3146 }, { "category": "Math", "score": 0.6018 }, { "category": "Other", "score": 0.3377 }, { "category": "Philosophy", "score": 0.4524 }, { "category": "Physics", "score": 0.5321 }, { "category": "Psychology", "score": 0.6119 } ], "unified_exam_results": [ { "category": "Average", "score": 7.0833 }, { "category": "Armenian language and literature", "score": 4.5 }, { "category": "Armenian history", "score": 5.25 }, { "category": "Mathematics", "score": 11.5 } ] } }, { "model_name": "claude-3-5-haiku-20241022", "results": { "mmlu_results": [ { "category": "Average", "score": 0.5198 }, { "category": "Biology", "score": 0.75 }, { "category": "Business", "score": 0.5758 }, { "category": "Chemistry", "score": 0.5579 }, { "category": "Computer Science", "score": 0.4412 }, { "category": "Economics", "score": 0.6901 }, { "category": "Engineering", "score": 0.4125 }, { "category": "Health", "score": 0.5882 }, { "category": "History", "score": 0.5172 }, { "category": "Law", "score": 0.2472 }, { "category": "Math", "score": 0.6018 }, { "category": "Other", "score": 0.3636 }, { "category": "Philosophy", "score": 0.4048 }, { "category": "Physics", "score": 0.5596 }, { "category": "Psychology", "score": 0.5672 } ], "unified_exam_results": [ { "category": "Average", "score": 6.5 }, { "category": "Armenian language and literature", "score": 5.0 }, { "category": "Armenian history", "score": 3.75 }, { "category": "Mathematics", "score": 10.75 } ] } } ]