Spaces:

GetmanY1
/

sami_asr

Running

File size: 1,003 Bytes

7e6e084
 
cf20a06
0b960f8
7e6e084
 
cf20a06
 
 
 
d4dffb9
 
 
cf20a06
 
d08907e
7e6e084
 
 
 
 
 
 
 
 
 
ca8157c
 
 
 
7e6e084

import gradio as gr
from transformers import pipeline
from librosa import resample
import numpy as np

def transcribe(input_audio):
    sr, speech = input_audio
    # Convert to mono if stereo
    if speech.ndim > 1:
        speech = speech.mean(axis=1)
    # Convert to float32 if needed
    if speech.dtype != "float32":
        speech = speech.astype(np.float32)
    # Resample if sampling rate is not 16kHz
    if sr!=16000:
        speech = resample(speech, orig_sr=sr, target_sr=16000)
    output = pipe(speech, chunk_length_s=30, stride_length_s=5)['text']
    return output

pipe = pipeline(
    "automatic-speech-recognition",
    model="GetmanY1/wav2vec2-large-sami-cont-pt-22k-finetuned",
    device="cpu"
    )

gradio_app = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(sources=["upload","microphone"]),
    outputs="text",
    title="Sámi Automatic Speech Recognition",
)

if __name__ == "__main__":
    gradio_app.launch()



# if __name__ == "__main__":
#     gradio_app.launch()