istupakov commited on
Commit
3f878f1
·
verified ·
1 Parent(s): 93a98ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -36
app.py CHANGED
@@ -1,4 +1,5 @@
1
  from importlib.metadata import version
 
2
 
3
  import gradio as gr
4
  import numpy as np
@@ -20,46 +21,43 @@ models = {
20
  }
21
 
22
 
23
- def recoginize(audio: tuple[int, np.ndarray]):
24
  sample_rate, waveform = audio
25
  try:
26
  waveform = waveform.astype(np.float32) / 2 ** (8 * waveform.itemsize - 1)
27
- return [[name, model.recognize(waveform, sample_rate=sample_rate, language="ru")] for name, model in models.items()]
 
 
 
 
 
28
  except Exception as e:
29
  raise gr.Error(f"{e} Audio: sample_rate: {sample_rate}, waveform.shape: {waveform.shape}.") from e
30
-
31
-
32
- demo = gr.Interface(
33
- fn=recoginize,
34
- title="ASR demo using onnx-asr (Russian models)",
35
- description="""
36
- # ONNX ASR
37
-
38
- [![PyPI - Version](https://img.shields.io/pypi/v/onnx-asr.svg)](https://pypi.org/project/onnx-asr)
39
- [![PyPI - Downloads](https://img.shields.io/pypi/dm/onnx-asr)](https://pypi.org/project/onnx-asr)
40
- [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/onnx-asr.svg)](https://pypi.org/project/onnx-asr)
41
- [![PyPI - Types](https://img.shields.io/pypi/types/onnx-asr)](https://pypi.org/project/onnx-asr)
42
- [![GitHub License](https://img.shields.io/github/license/istupakov/onnx-asr)](https://github.com/istupakov/onnx-asr/blob/main/LICENSE)
43
- [![CI](https://github.com/istupakov/onnx-asr/actions/workflows/python-package.yml/badge.svg)](https://github.com/istupakov/onnx-asr/actions/workflows/python-package.yml)
44
-
45
- **[onnx-asr](https://github.com/istupakov/onnx-asr)** is a Python package for Automatic Speech Recognition using ONNX models. The package is written in pure Python with minimal dependencies (*PyTorch is not required*):
46
-
47
- [![numpy](https://img.shields.io/badge/numpy-required-blue?logo=numpy)](https://pypi.org/project/numpy/)
48
- [![onnxruntime](https://img.shields.io/badge/onnxruntime-required-blue?logo=onnx)](https://pypi.org/project/onnxruntime/)
49
- [![huggingface-hub](https://img.shields.io/badge/huggingface--hub-optional-blue?logo=huggingface)](https://pypi.org/project/huggingface-hub/)
50
-
51
- ## Models used in demo:
52
- * `gigaam-v2-ctc` - Sber GigaAM v2 CTC ([origin](https://github.com/salute-developers/GigaAM), [onnx](https://huggingface.co/istupakov/gigaam-v2-onnx))
53
- * `gigaam-v2-rnnt` - Sber GigaAM v2 RNN-T ([origin](https://github.com/salute-developers/GigaAM), [onnx](https://huggingface.co/istupakov/gigaam-v2-onnx))
54
- * `nemo-fastconformer-ru-ctc` - Nvidia FastConformer-Hybrid Large (ru) with CTC decoder ([origin](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc), [onnx](https://huggingface.co/istupakov/stt_ru_fastconformer_hybrid_large_pc_onnx))
55
- * `nemo-fastconformer-ru-rnnt` - Nvidia FastConformer-Hybrid Large (ru) with RNN-T decoder ([origin](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc), [onnx](https://huggingface.co/istupakov/stt_ru_fastconformer_hybrid_large_pc_onnx))
56
- * `alphacep/vosk-model-ru` - Alpha Cephei Vosk 0.54-ru ([origin](https://huggingface.co/alphacep/vosk-model-ru))
57
- * `alphacep/vosk-model-small-ru` - Alpha Cephei Vosk 0.52-small-ru ([origin](https://huggingface.co/alphacep/vosk-model-small-ru))
58
- * `whisper-base` - OpenAI Whisper Base exported with onnxruntime ([origin](https://huggingface.co/openai/whisper-base), [onnx](https://huggingface.co/istupakov/whisper-base-onnx))
59
- """,
60
- inputs=[gr.Audio(min_length=1, max_length=20)],
61
- outputs=[gr.Dataframe(headers=["Model", "result"], wrap=True, show_fullscreen_button=True)],
62
- flagging_mode="never",
63
- )
64
 
65
  demo.launch()
 
1
  from importlib.metadata import version
2
+ from timeit import default_timer as timer
3
 
4
  import gradio as gr
5
  import numpy as np
 
21
  }
22
 
23
 
24
+ def recognize(audio: tuple[int, np.ndarray]):
25
  sample_rate, waveform = audio
26
  try:
27
  waveform = waveform.astype(np.float32) / 2 ** (8 * waveform.itemsize - 1)
28
+ results = []
29
+ for name, model in models.items():
30
+ start = timer()
31
+ result = model.recognize(waveform, sample_rate=sample_rate, language="ru")
32
+ time = timer() - start
33
+ results.append([name, result, f"{time:.3f} s."])
34
  except Exception as e:
35
  raise gr.Error(f"{e} Audio: sample_rate: {sample_rate}, waveform.shape: {waveform.shape}.") from e
36
+ else:
37
+ return results
38
+
39
+
40
+ with gr.Blocks() as demo:
41
+ gr.Markdown("""
42
+ # ASR demo using onnx-asr (Russian models)
43
+ **[onnx-asr](https://github.com/istupakov/onnx-asr)** is a Python package for Automatic Speech Recognition using ONNX models.
44
+ The package is written in pure Python with minimal dependencies (no `pytorch` or `transformers`).
45
+ """)
46
+ input = gr.Audio(min_length=1, max_length=20)
47
+ with gr.Row():
48
+ gr.ClearButton(input)
49
+ btn = gr.Button("Recognize", variant="primary")
50
+ output = gr.Dataframe(headers=["model", "result", "time"], wrap=True)
51
+ btn.click(fn=recognize, inputs=input, outputs=output)
52
+ with gr.Accordion("ASR models used in this demo", open=False):
53
+ gr.Markdown("""
54
+ * `gigaam-v2-ctc` - Sber GigaAM v2 CTC ([origin](https://github.com/salute-developers/GigaAM), [onnx](https://huggingface.co/istupakov/gigaam-v2-onnx))
55
+ * `gigaam-v2-rnnt` - Sber GigaAM v2 RNN-T ([origin](https://github.com/salute-developers/GigaAM), [onnx](https://huggingface.co/istupakov/gigaam-v2-onnx))
56
+ * `nemo-fastconformer-ru-ctc` - Nvidia FastConformer-Hybrid Large (ru) with CTC decoder ([origin](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc), [onnx](https://huggingface.co/istupakov/stt_ru_fastconformer_hybrid_large_pc_onnx))
57
+ * `nemo-fastconformer-ru-rnnt` - Nvidia FastConformer-Hybrid Large (ru) with RNN-T decoder ([origin](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc), [onnx](https://huggingface.co/istupakov/stt_ru_fastconformer_hybrid_large_pc_onnx))
58
+ * `alphacep/vosk-model-ru` - Alpha Cephei Vosk 0.54-ru ([origin](https://huggingface.co/alphacep/vosk-model-ru))
59
+ * `alphacep/vosk-model-small-ru` - Alpha Cephei Vosk 0.52-small-ru ([origin](https://huggingface.co/alphacep/vosk-model-small-ru))
60
+ * `whisper-base` - OpenAI Whisper Base exported with onnxruntime ([origin](https://huggingface.co/openai/whisper-base), [onnx](https://huggingface.co/istupakov/whisper-base-onnx))
61
+ """)
 
 
 
 
 
 
 
 
62
 
63
  demo.launch()