Upload organize_model_results.json with huggingface_hub
Browse files- organize_model_results.json +46 -9
organize_model_results.json
CHANGED
@@ -20,7 +20,11 @@
|
|
20 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.0,
|
21 |
"Qwen2-Audio-7B-Instruct": 0.5685405990059489,
|
22 |
"whisper_large_v3": 0.8294532718704128,
|
23 |
-
"
|
|
|
|
|
|
|
|
|
24 |
}
|
25 |
},
|
26 |
"wavcaps_test": {
|
@@ -895,13 +899,21 @@
|
|
895 |
"Qwen-Audio-Chat": 38.5,
|
896 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.5,
|
897 |
"Qwen2-Audio-7B-Instruct": 44.4,
|
898 |
-
"
|
|
|
|
|
|
|
|
|
899 |
},
|
900 |
"llama3_70b_judge": {
|
901 |
"Qwen-Audio-Chat": 53.6,
|
902 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.60000000000001,
|
903 |
"Qwen2-Audio-7B-Instruct": 58.9,
|
904 |
-
"
|
|
|
|
|
|
|
|
|
905 |
}
|
906 |
},
|
907 |
"mmau_mini_music": {
|
@@ -909,13 +921,21 @@
|
|
909 |
"Qwen-Audio-Chat": 0.4311377245508982,
|
910 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6077844311377245,
|
911 |
"Qwen2-Audio-7B-Instruct": 0.45808383233532934,
|
912 |
-
"
|
|
|
|
|
|
|
|
|
913 |
},
|
914 |
"llama3_70b_judge": {
|
915 |
"Qwen-Audio-Chat": 0.5958083832335329,
|
916 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6437125748502994,
|
917 |
"Qwen2-Audio-7B-Instruct": 0.6017964071856288,
|
918 |
-
"
|
|
|
|
|
|
|
|
|
919 |
}
|
920 |
},
|
921 |
"mmau_mini_sound": {
|
@@ -923,13 +943,21 @@
|
|
923 |
"Qwen-Audio-Chat": 0.43543543543543545,
|
924 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6606606606606606,
|
925 |
"Qwen2-Audio-7B-Instruct": 0.4744744744744745,
|
926 |
-
"
|
|
|
|
|
|
|
|
|
927 |
},
|
928 |
"llama3_70b_judge": {
|
929 |
"Qwen-Audio-Chat": 0.5945945945945946,
|
930 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.7027027027027027,
|
931 |
"Qwen2-Audio-7B-Instruct": 0.6306306306306306,
|
932 |
-
"
|
|
|
|
|
|
|
|
|
933 |
}
|
934 |
},
|
935 |
"mmau_mini_speech": {
|
@@ -937,13 +965,21 @@
|
|
937 |
"Qwen-Audio-Chat": 0.2882882882882883,
|
938 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5465465465465466,
|
939 |
"Qwen2-Audio-7B-Instruct": 0.3993993993993994,
|
940 |
-
"
|
|
|
|
|
|
|
|
|
941 |
},
|
942 |
"llama3_70b_judge": {
|
943 |
"Qwen-Audio-Chat": 0.4174174174174174,
|
944 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5915915915915916,
|
945 |
"Qwen2-Audio-7B-Instruct": 0.5345345345345346,
|
946 |
-
"
|
|
|
|
|
|
|
|
|
947 |
}
|
948 |
},
|
949 |
"imda_part5_30s_sqa_test": {
|
@@ -1070,6 +1106,7 @@
|
|
1070 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 62.599999999999994,
|
1071 |
"Qwen2-Audio-7B-Instruct": 46.2,
|
1072 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 65.4,
|
|
|
1073 |
"WavLLM_fairseq": 49.400000000000006,
|
1074 |
"SALMONN_7B": 24.2,
|
1075 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 57.199999999999996
|
|
|
20 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.0,
|
21 |
"Qwen2-Audio-7B-Instruct": 0.5685405990059489,
|
22 |
"whisper_large_v3": 0.8294532718704128,
|
23 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.4757667842702995,
|
24 |
+
"phi_4_multimodal_instruct": 1.3868687388941825,
|
25 |
+
"WavLLM_fairseq": 1.2058793232211378,
|
26 |
+
"SALMONN_7B": 0.7757204295537071,
|
27 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.4715562308464886
|
28 |
}
|
29 |
},
|
30 |
"wavcaps_test": {
|
|
|
899 |
"Qwen-Audio-Chat": 38.5,
|
900 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.5,
|
901 |
"Qwen2-Audio-7B-Instruct": 44.4,
|
902 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 46.7,
|
903 |
+
"gemini-1.5-flash": 31.4,
|
904 |
+
"phi_4_multimodal_instruct": 54.50000000000001,
|
905 |
+
"SALMONN_7B": 40.5,
|
906 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 51.0
|
907 |
},
|
908 |
"llama3_70b_judge": {
|
909 |
"Qwen-Audio-Chat": 53.6,
|
910 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.60000000000001,
|
911 |
"Qwen2-Audio-7B-Instruct": 58.9,
|
912 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 52.7,
|
913 |
+
"gemini-1.5-flash": 58.199999999999996,
|
914 |
+
"phi_4_multimodal_instruct": 59.4,
|
915 |
+
"SALMONN_7B": 48.4,
|
916 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 55.60000000000001
|
917 |
}
|
918 |
},
|
919 |
"mmau_mini_music": {
|
|
|
921 |
"Qwen-Audio-Chat": 0.4311377245508982,
|
922 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6077844311377245,
|
923 |
"Qwen2-Audio-7B-Instruct": 0.45808383233532934,
|
924 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.39520958083832336,
|
925 |
+
"gemini-1.5-flash": 0.2904191616766467,
|
926 |
+
"phi_4_multimodal_instruct": 0.6377245508982036,
|
927 |
+
"SALMONN_7B": 0.4820359281437126,
|
928 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5
|
929 |
},
|
930 |
"llama3_70b_judge": {
|
931 |
"Qwen-Audio-Chat": 0.5958083832335329,
|
932 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6437125748502994,
|
933 |
"Qwen2-Audio-7B-Instruct": 0.6017964071856288,
|
934 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.44011976047904194,
|
935 |
+
"gemini-1.5-flash": 0.5868263473053892,
|
936 |
+
"phi_4_multimodal_instruct": 0.688622754491018,
|
937 |
+
"SALMONN_7B": 0.5598802395209581,
|
938 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5359281437125748
|
939 |
}
|
940 |
},
|
941 |
"mmau_mini_sound": {
|
|
|
943 |
"Qwen-Audio-Chat": 0.43543543543543545,
|
944 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6606606606606606,
|
945 |
"Qwen2-Audio-7B-Instruct": 0.4744744744744745,
|
946 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.45045045045045046,
|
947 |
+
"gemini-1.5-flash": 0.3483483483483483,
|
948 |
+
"phi_4_multimodal_instruct": 0.5975975975975976,
|
949 |
+
"SALMONN_7B": 0.4594594594594595,
|
950 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.46546546546546547
|
951 |
},
|
952 |
"llama3_70b_judge": {
|
953 |
"Qwen-Audio-Chat": 0.5945945945945946,
|
954 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.7027027027027027,
|
955 |
"Qwen2-Audio-7B-Instruct": 0.6306306306306306,
|
956 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5345345345345346,
|
957 |
+
"gemini-1.5-flash": 0.5885885885885885,
|
958 |
+
"phi_4_multimodal_instruct": 0.6456456456456456,
|
959 |
+
"SALMONN_7B": 0.5105105105105106,
|
960 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5105105105105106
|
961 |
}
|
962 |
},
|
963 |
"mmau_mini_speech": {
|
|
|
965 |
"Qwen-Audio-Chat": 0.2882882882882883,
|
966 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5465465465465466,
|
967 |
"Qwen2-Audio-7B-Instruct": 0.3993993993993994,
|
968 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5555555555555556,
|
969 |
+
"gemini-1.5-flash": 0.3033033033033033,
|
970 |
+
"phi_4_multimodal_instruct": 0.3993993993993994,
|
971 |
+
"SALMONN_7B": 0.2732732732732733,
|
972 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5645645645645646
|
973 |
},
|
974 |
"llama3_70b_judge": {
|
975 |
"Qwen-Audio-Chat": 0.4174174174174174,
|
976 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5915915915915916,
|
977 |
"Qwen2-Audio-7B-Instruct": 0.5345345345345346,
|
978 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.6066066066066066,
|
979 |
+
"gemini-1.5-flash": 0.5705705705705706,
|
980 |
+
"phi_4_multimodal_instruct": 0.44744744744744747,
|
981 |
+
"SALMONN_7B": 0.3813813813813814,
|
982 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.6216216216216216
|
983 |
}
|
984 |
},
|
985 |
"imda_part5_30s_sqa_test": {
|
|
|
1106 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 62.599999999999994,
|
1107 |
"Qwen2-Audio-7B-Instruct": 46.2,
|
1108 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 65.4,
|
1109 |
+
"phi_4_multimodal_instruct": 52.599999999999994,
|
1110 |
"WavLLM_fairseq": 49.400000000000006,
|
1111 |
"SALMONN_7B": 24.2,
|
1112 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 57.199999999999996
|