binwang commited on
Commit
8a00bd5
·
verified ·
1 Parent(s): 719a2ca

Upload organize_model_results.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. organize_model_results.json +46 -9
organize_model_results.json CHANGED
@@ -20,7 +20,11 @@
20
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.0,
21
  "Qwen2-Audio-7B-Instruct": 0.5685405990059489,
22
  "whisper_large_v3": 0.8294532718704128,
23
- "phi_4_multimodal_instruct": 1.3868687388941825
 
 
 
 
24
  }
25
  },
26
  "wavcaps_test": {
@@ -895,13 +899,21 @@
895
  "Qwen-Audio-Chat": 38.5,
896
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 60.5,
897
  "Qwen2-Audio-7B-Instruct": 44.4,
898
- "phi_4_multimodal_instruct": 54.50000000000001
 
 
 
 
899
  },
900
  "llama3_70b_judge": {
901
  "Qwen-Audio-Chat": 53.6,
902
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 64.60000000000001,
903
  "Qwen2-Audio-7B-Instruct": 58.9,
904
- "phi_4_multimodal_instruct": 59.4
 
 
 
 
905
  }
906
  },
907
  "mmau_mini_music": {
@@ -909,13 +921,21 @@
909
  "Qwen-Audio-Chat": 0.4311377245508982,
910
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6077844311377245,
911
  "Qwen2-Audio-7B-Instruct": 0.45808383233532934,
912
- "phi_4_multimodal_instruct": 0.6377245508982036
 
 
 
 
913
  },
914
  "llama3_70b_judge": {
915
  "Qwen-Audio-Chat": 0.5958083832335329,
916
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6437125748502994,
917
  "Qwen2-Audio-7B-Instruct": 0.6017964071856288,
918
- "phi_4_multimodal_instruct": 0.688622754491018
 
 
 
 
919
  }
920
  },
921
  "mmau_mini_sound": {
@@ -923,13 +943,21 @@
923
  "Qwen-Audio-Chat": 0.43543543543543545,
924
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6606606606606606,
925
  "Qwen2-Audio-7B-Instruct": 0.4744744744744745,
926
- "phi_4_multimodal_instruct": 0.5975975975975976
 
 
 
 
927
  },
928
  "llama3_70b_judge": {
929
  "Qwen-Audio-Chat": 0.5945945945945946,
930
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.7027027027027027,
931
  "Qwen2-Audio-7B-Instruct": 0.6306306306306306,
932
- "phi_4_multimodal_instruct": 0.6456456456456456
 
 
 
 
933
  }
934
  },
935
  "mmau_mini_speech": {
@@ -937,13 +965,21 @@
937
  "Qwen-Audio-Chat": 0.2882882882882883,
938
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5465465465465466,
939
  "Qwen2-Audio-7B-Instruct": 0.3993993993993994,
940
- "phi_4_multimodal_instruct": 0.3993993993993994
 
 
 
 
941
  },
942
  "llama3_70b_judge": {
943
  "Qwen-Audio-Chat": 0.4174174174174174,
944
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5915915915915916,
945
  "Qwen2-Audio-7B-Instruct": 0.5345345345345346,
946
- "phi_4_multimodal_instruct": 0.44744744744744747
 
 
 
 
947
  }
948
  },
949
  "imda_part5_30s_sqa_test": {
@@ -1070,6 +1106,7 @@
1070
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 62.599999999999994,
1071
  "Qwen2-Audio-7B-Instruct": 46.2,
1072
  "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 65.4,
 
1073
  "WavLLM_fairseq": 49.400000000000006,
1074
  "SALMONN_7B": 24.2,
1075
  "cascade_whisper_large_v3_llama_3_8b_instruct": 57.199999999999996
 
20
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.0,
21
  "Qwen2-Audio-7B-Instruct": 0.5685405990059489,
22
  "whisper_large_v3": 0.8294532718704128,
23
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.4757667842702995,
24
+ "phi_4_multimodal_instruct": 1.3868687388941825,
25
+ "WavLLM_fairseq": 1.2058793232211378,
26
+ "SALMONN_7B": 0.7757204295537071,
27
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.4715562308464886
28
  }
29
  },
30
  "wavcaps_test": {
 
899
  "Qwen-Audio-Chat": 38.5,
900
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 60.5,
901
  "Qwen2-Audio-7B-Instruct": 44.4,
902
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 46.7,
903
+ "gemini-1.5-flash": 31.4,
904
+ "phi_4_multimodal_instruct": 54.50000000000001,
905
+ "SALMONN_7B": 40.5,
906
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 51.0
907
  },
908
  "llama3_70b_judge": {
909
  "Qwen-Audio-Chat": 53.6,
910
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 64.60000000000001,
911
  "Qwen2-Audio-7B-Instruct": 58.9,
912
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 52.7,
913
+ "gemini-1.5-flash": 58.199999999999996,
914
+ "phi_4_multimodal_instruct": 59.4,
915
+ "SALMONN_7B": 48.4,
916
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 55.60000000000001
917
  }
918
  },
919
  "mmau_mini_music": {
 
921
  "Qwen-Audio-Chat": 0.4311377245508982,
922
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6077844311377245,
923
  "Qwen2-Audio-7B-Instruct": 0.45808383233532934,
924
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.39520958083832336,
925
+ "gemini-1.5-flash": 0.2904191616766467,
926
+ "phi_4_multimodal_instruct": 0.6377245508982036,
927
+ "SALMONN_7B": 0.4820359281437126,
928
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5
929
  },
930
  "llama3_70b_judge": {
931
  "Qwen-Audio-Chat": 0.5958083832335329,
932
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6437125748502994,
933
  "Qwen2-Audio-7B-Instruct": 0.6017964071856288,
934
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.44011976047904194,
935
+ "gemini-1.5-flash": 0.5868263473053892,
936
+ "phi_4_multimodal_instruct": 0.688622754491018,
937
+ "SALMONN_7B": 0.5598802395209581,
938
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5359281437125748
939
  }
940
  },
941
  "mmau_mini_sound": {
 
943
  "Qwen-Audio-Chat": 0.43543543543543545,
944
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6606606606606606,
945
  "Qwen2-Audio-7B-Instruct": 0.4744744744744745,
946
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.45045045045045046,
947
+ "gemini-1.5-flash": 0.3483483483483483,
948
+ "phi_4_multimodal_instruct": 0.5975975975975976,
949
+ "SALMONN_7B": 0.4594594594594595,
950
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.46546546546546547
951
  },
952
  "llama3_70b_judge": {
953
  "Qwen-Audio-Chat": 0.5945945945945946,
954
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.7027027027027027,
955
  "Qwen2-Audio-7B-Instruct": 0.6306306306306306,
956
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5345345345345346,
957
+ "gemini-1.5-flash": 0.5885885885885885,
958
+ "phi_4_multimodal_instruct": 0.6456456456456456,
959
+ "SALMONN_7B": 0.5105105105105106,
960
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5105105105105106
961
  }
962
  },
963
  "mmau_mini_speech": {
 
965
  "Qwen-Audio-Chat": 0.2882882882882883,
966
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5465465465465466,
967
  "Qwen2-Audio-7B-Instruct": 0.3993993993993994,
968
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5555555555555556,
969
+ "gemini-1.5-flash": 0.3033033033033033,
970
+ "phi_4_multimodal_instruct": 0.3993993993993994,
971
+ "SALMONN_7B": 0.2732732732732733,
972
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5645645645645646
973
  },
974
  "llama3_70b_judge": {
975
  "Qwen-Audio-Chat": 0.4174174174174174,
976
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5915915915915916,
977
  "Qwen2-Audio-7B-Instruct": 0.5345345345345346,
978
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.6066066066066066,
979
+ "gemini-1.5-flash": 0.5705705705705706,
980
+ "phi_4_multimodal_instruct": 0.44744744744744747,
981
+ "SALMONN_7B": 0.3813813813813814,
982
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.6216216216216216
983
  }
984
  },
985
  "imda_part5_30s_sqa_test": {
 
1106
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 62.599999999999994,
1107
  "Qwen2-Audio-7B-Instruct": 46.2,
1108
  "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 65.4,
1109
+ "phi_4_multimodal_instruct": 52.599999999999994,
1110
  "WavLLM_fairseq": 49.400000000000006,
1111
  "SALMONN_7B": 24.2,
1112
  "cascade_whisper_large_v3_llama_3_8b_instruct": 57.199999999999996