Spaces:

spookyspaghetti
/

Speech-Analyser

Runtime error

App Files Files Community

spookyspaghetti commited on Dec 29, 2022

Commit

0432ec8

1 Parent(s): d499ebb

Create app.py

Browse files

Files changed (1) hide show

app.py +55 -0

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+!pip install -U git+https://github.com/PrithivirajDamodaran/Gramformer.git
+!pip install gradio -q
+## Install dependencies
+!pip install wget
+!apt-get install sox libsndfile1 ffmpeg
+!pip install text-unidecode
+!pip install matplotlib>=3.3.2
+## Install NeMo
+BRANCH = 'r1.13.0'
+!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]
+## Grab the config we'll use in this example
+!mkdir configs
+!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml
+!python -m spacy download en_core_web_md
+!python -m spacy link en_core_web_md en
+import gradio as gr
+import time
+from nemo.collections.asr.models import ASRModel
+import torch
+if torch.cuda.is_available():
+    device = torch.device(f'cuda:0')
+asr_model = ASRModel.from_pretrained(model_name='stt_en_citrinet_1024')
+from gramformer import Gramformer
+import torch
+def set_seed(seed):
+  torch.manual_seed(seed)
+  if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed)
+set_seed(1212)
+gf = Gramformer(models = 1, use_gpu=False) # 1=corrector, 2=detector
+def transcribe(audio):
+  """Speech to text using Nvidia Nemo"""
+  text = asr_model.transcribe(paths2audio_files=[audio])[0]
+  correct = list(gf.correct(text, max_candidates = 1))[0]
+  return text, correct
+# we need input, output and interface components for gradio
+gr.Interface(
+    fn=transcribe,
+    inputs=[
+        gr.components.Audio(type="filepath"),
+    ],
+    outputs=[
+        "textbox",
+        "textbox"
+    ]).launch()