Spaces:
Runtime error
Runtime error
Commit
·
0432ec8
1
Parent(s):
d499ebb
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
!pip install -U git+https://github.com/PrithivirajDamodaran/Gramformer.git
|
2 |
+
!pip install gradio -q
|
3 |
+
## Install dependencies
|
4 |
+
!pip install wget
|
5 |
+
!apt-get install sox libsndfile1 ffmpeg
|
6 |
+
!pip install text-unidecode
|
7 |
+
!pip install matplotlib>=3.3.2
|
8 |
+
|
9 |
+
## Install NeMo
|
10 |
+
BRANCH = 'r1.13.0'
|
11 |
+
!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]
|
12 |
+
|
13 |
+
## Grab the config we'll use in this example
|
14 |
+
!mkdir configs
|
15 |
+
!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml
|
16 |
+
|
17 |
+
!python -m spacy download en_core_web_md
|
18 |
+
!python -m spacy link en_core_web_md en
|
19 |
+
|
20 |
+
import gradio as gr
|
21 |
+
import time
|
22 |
+
from nemo.collections.asr.models import ASRModel
|
23 |
+
import torch
|
24 |
+
if torch.cuda.is_available():
|
25 |
+
device = torch.device(f'cuda:0')
|
26 |
+
asr_model = ASRModel.from_pretrained(model_name='stt_en_citrinet_1024')
|
27 |
+
|
28 |
+
from gramformer import Gramformer
|
29 |
+
import torch
|
30 |
+
|
31 |
+
def set_seed(seed):
|
32 |
+
torch.manual_seed(seed)
|
33 |
+
if torch.cuda.is_available():
|
34 |
+
torch.cuda.manual_seed_all(seed)
|
35 |
+
|
36 |
+
set_seed(1212)
|
37 |
+
|
38 |
+
gf = Gramformer(models = 1, use_gpu=False) # 1=corrector, 2=detector
|
39 |
+
|
40 |
+
def transcribe(audio):
|
41 |
+
"""Speech to text using Nvidia Nemo"""
|
42 |
+
text = asr_model.transcribe(paths2audio_files=[audio])[0]
|
43 |
+
correct = list(gf.correct(text, max_candidates = 1))[0]
|
44 |
+
return text, correct
|
45 |
+
|
46 |
+
# we need input, output and interface components for gradio
|
47 |
+
gr.Interface(
|
48 |
+
fn=transcribe,
|
49 |
+
inputs=[
|
50 |
+
gr.components.Audio(type="filepath"),
|
51 |
+
],
|
52 |
+
outputs=[
|
53 |
+
"textbox",
|
54 |
+
"textbox"
|
55 |
+
]).launch()
|