kotoba-tech
/

kotoba-whisper-v2.1

@@ -8,19 +8,8 @@ tags:
 metrics:
 - wer
 - cer
-widget:
-- example_title: CommonVoice 8.0 (Test Split)
-  src: >-
-    https://huggingface.co/datasets/japanese-asr/ja_asr.common_voice_8_0/resolve/main/sample.flac
-- example_title: JSUT Basic 5000
-  src: >-
-    https://huggingface.co/datasets/japanese-asr/ja_asr.jsut_basic5000/resolve/main/sample.flac
-- example_title: ReazonSpeech (Test Split)
-  src: >-
-    https://huggingface.co/datasets/japanese-asr/ja_asr.reazonspeech_test/resolve/main/sample.flac
-pipeline_tag: automatic-speech-recognition
 model-index:
-- name: kotoba-tech/kotoba-whisper-v2.1
   results:
   - task:
       type: automatic-speech-recognition
@@ -28,36 +17,40 @@ model-index:
       name: CommonVoice_8.0 (Japanese)
       type: japanese-asr/ja_asr.common_voice_8_0
     metrics:
-    - type: WER
-      value: 59.27
-      name: WER
-    - type: CER
-      value: 9.44
-      name: CER
   - task:
       type: automatic-speech-recognition
     dataset:
       name: ReazonSpeech (Test)
       type: japanese-asr/ja_asr.reazonspeech_test
     metrics:
-    - type: WER
-      value: 56.62
-      name: WER
-    - type: CER
-      value: 12.6
-      name: CER
   - task:
       type: automatic-speech-recognition
     dataset:
       name: JSUT Basic5000
       type: japanese-asr/ja_asr.jsut_basic5000
     metrics:
-    - type: WER
-      value: 64.36
-      name: WER
-    - type: CER
-      value: 8.48
-      name: CER
 ---
 # Kotoba-Whisper-v2.1
@@ -74,15 +67,15 @@ along with the.
 | model                                                    |   CommonVoice 8.0 (Japanese) |   JSUT Basic 5000 |  ReazonSpeech Test |
 |:---------------------------------------------------------|---------------------------------------:|-------------------------------------:|----------------------------------------:|
-| kotoba-tech/kotoba-whisper-v2.0                          |                                   15.6 |                                 15.2 |                                    17.8 |
-| kotoba-tech/kotoba-whisper-v2.1 (punctuator + stable-ts) |                                   13.7 |                                 ***11.2*** |                                    ***17.4*** |
-| kotoba-tech/kotoba-whisper-v2.1 (punctuator)             |                                   13.9 |                                 11.4 |                                    18   |
-| kotoba-tech/kotoba-whisper-v2.1 (stable-ts)              |                                   15.7 |                                 15   |                                    17.7 |
-| kotoba-tech/kotoba-whisper-v1.0                          |                                   15.6 |                                 15.2 |                                    17.8 |
-| kotoba-tech/kotoba-whisper-v1.1 (punctuator + stable-ts) |                                   13.7 |                                 ***11.2*** |                                    ***17.4*** |
-| kotoba-tech/kotoba-whisper-v1.1 (punctuator)             |                                   13.9 |                                 11.4 |                                    18   |
-| kotoba-tech/kotoba-whisper-v1.1 (stable-ts)              |                                   15.7 |                                 15   |                                    17.7 |
-| openai/whisper-large-v3                                  |                                   ***12.9*** |                                 13.4 |                                    20.6 |
 Regarding to the normalized CER, since those update from v2.1 will be removed by the normalization, kotoba-tech/kotoba-whisper-v2.1 marks the same CER values as [kotoba-tech/kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0).

 metrics:
 - wer
 - cer
 model-index:
+- name: kotoba-tech/kotoba-whisper-v2.0
   results:
   - task:
       type: automatic-speech-recognition
       name: CommonVoice_8.0 (Japanese)
       type: japanese-asr/ja_asr.common_voice_8_0
     metrics:
+    - name: WER
+      type: WER
+      value: 58.9
+    - name: CER
+      type: CER
+      value: 9.2
   - task:
       type: automatic-speech-recognition
     dataset:
       name: ReazonSpeech (Test)
       type: japanese-asr/ja_asr.reazonspeech_test
     metrics:
+    - name: WER
+      type: WER
+      value: 55.6
+    - name: CER
+      type: CER
+      value: 11.63
   - task:
       type: automatic-speech-recognition
     dataset:
       name: JSUT Basic5000
       type: japanese-asr/ja_asr.jsut_basic5000
     metrics:
+    - name: WER
+      type: WER
+      value: 63.8
+    - name: CER
+      type: CER
+      value: 8.4
+datasets:
+- japanese-asr/whisper_transcriptions.reazonspeech.all
+- japanese-asr/whisper_transcriptions.reazonspeech.all.wer_10.0
+- japanese-asr/whisper_transcriptions.reazonspeech.all.wer_10.0.vectorized
 ---
 # Kotoba-Whisper-v2.1
 | model                                                    |   CommonVoice 8.0 (Japanese) |   JSUT Basic 5000 |  ReazonSpeech Test |
 |:---------------------------------------------------------|---------------------------------------:|-------------------------------------:|----------------------------------------:|
+| [kotoba-tech/kotoba-whisper-v2.1](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.1) (punctuator + stable-ts)    |                                   13.7 |                                 11.4 |                                    17   |
+| [kotoba-tech/kotoba-whisper-v2.1](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.1) (punctuator)                |                                   13.8 |                                 11.6 |                                    17.3 |
+| [kotoba-tech/kotoba-whisper-v2.1](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.1) (stable-ts)                 |                                   15.5 |                                 15.4 |                                    17   |
+| [kotoba-tech/kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0)                             |                                   15.4 |                                 15.4 |                                    17.4 |
+| [kotoba-tech/kotoba-whisper-v1.1](https://huggingface.co/kotoba-tech/kotoba-whisper-v1.1) (punctuator + stable-ts)    |                                   13.7 |                                 11.2 |                                    17.4 |
+| [kotoba-tech/kotoba-whisper-v1.1](https://huggingface.co/kotoba-tech/kotoba-whisper-v1.1) (punctuator)                |                                   13.9 |                                 11.4 |                                    18   |
+| [kotoba-tech/kotoba-whisper-v1.1](https://huggingface.co/kotoba-tech/kotoba-whisper-v1.1) (stable-ts)                 |                                   15.7 |                                 15   |                                    17.7 |
+| [kotoba-tech/kotoba-whisper-v1.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v1.0)                             |                                   15.6 |                                 15.2 |                                    17.8 |
+| [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3)                                     |                                   12.9 |                                 13.4 |                                    20.6 |
 Regarding to the normalized CER, since those update from v2.1 will be removed by the normalization, kotoba-tech/kotoba-whisper-v2.1 marks the same CER values as [kotoba-tech/kotoba-whisper-v2.0](https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0).