Voice_Commands / app.py
zinoubm's picture
initial commit
0013d95
raw
history blame
1.07 kB
import os
import gradio as gr
import numpy as np
import librosa
import torch
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
from dotenv import load_dotenv
load_dotenv()
os.environ["PATH"] += ".\env\Lib\site-packages\ffprobe"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL = os.getenv("MODEL")
model = Speech2TextForConditionalGeneration.from_pretrained(
"facebook/s2t-small-librispeech-asr"
)
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
def transcribe(audio):
input, rate = librosa.load(
audio, sr=16000
) # Downsample original frequency to 16000hrz
inputs = processor(input, sampling_rate=rate, return_tensors="pt")
generated_ids = model.generate(
inputs["input_features"], attention_mask=inputs["attention_mask"]
)
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
return transcription
gr.Interface(
fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text"
).launch()