Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
import numpy as np | |
import librosa | |
import torch | |
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration | |
from dotenv import load_dotenv | |
import openai | |
from run_command_responses import ResponseManager as rs | |
resoponses = { | |
"heated_seats_on": rs.activate_heated_seats, | |
"heated_seats_off": rs.deactivate_heated_seats, | |
"cooled_seats_on": rs.activate_cooled_seats, | |
"cooled_seats_off": rs.deactivate_cooled_seats, | |
"massage_seats_on": rs.activate_massage_seats, | |
"massage_seats_off": rs.deactivate_massage_seats, | |
} | |
id2label = { | |
1: "massage_seats_on", | |
2: "massage_seats_off", | |
3: "heated_seats_on", | |
4: "heated_seats_off", | |
5: "cooled_seats_on", | |
6: "cooled_seats_off", | |
} | |
load_dotenv() | |
os.environ["PATH"] += ".\env\Lib\site-packages\ffprobe" | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
MODEL = os.getenv("MODEL") | |
openai.api_key = OPENAI_API_KEY | |
model = Speech2TextForConditionalGeneration.from_pretrained( | |
"facebook/s2t-small-librispeech-asr" | |
) | |
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr") | |
def get_command(command, model, id2label): | |
completion = openai.Completion.create( | |
model=model, prompt=f"{command}->", max_tokens=1, temperature=0 | |
) | |
id = int(completion["choices"][0]["text"].strip()) | |
result = id2label[id] if id in id2label else "unknown" | |
return result | |
def transcribe(audio): | |
input, rate = librosa.load( | |
audio, sr=16000 | |
) # Downsample original frequency to 16000hrz | |
inputs = processor(input, sampling_rate=rate, return_tensors="pt") | |
generated_ids = model.generate( | |
inputs["input_features"], attention_mask=inputs["attention_mask"] | |
) | |
transcription = processor.batch_decode(generated_ids, skip_special_tokens=True) | |
result = get_command(transcription, MODEL, id2label) | |
resoponses.get(result)() | |
return result | |
if __name__ == "__main__": | |
gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(source="microphone", type="filepath"), | |
outputs="text", | |
).launch() | |