jiaofengxu
Update app.py
5f35e8d
raw
history blame
1.16 kB
#from transformers import pipeline
import gradio as gr
#import nemo.collections.asr as nemo_asr
#import gradio
#model = pipeline("automatic-speech-recognition")
#model = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
#model = pipeline("automatic-speech-recognition", model="nvidia/parakeet-ctc-0.6b")
'''
asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(model_name="nvidia/parakeet-ctc-1.1b")
def transcribe_audio(mic=None, file=None):
if mic is not None:
audio = mic
elif file is not None:
audio = file
else:
return "You must either provide a mic recording or a file"
#transcription = model(audio)["text"]
transcription = asr_model(audio)
return transcription
gr.Interface(
fn=transcribe_audio,
inputs=[
gr.Audio(sources="microphone", type="filepath"),
gr.Audio(sources="upload", type="filepath"),
],
outputs="text",
).launch(share=True)
'''
#gr.load("models/nvidia/parakeet-ctc-1.1b").launch()
#gr.load("models/openai/whisper-medium.en").launch(share=True)
gr.load("models/nvidia/stt_en_fastconformer_ctc_large").launch(share=True)