Spaces:
Runtime error
Runtime error
import logging | |
import gradio as gr | |
import openai | |
from constants import * | |
import string | |
openai.api_key = OPENAI_API_KEY | |
openai.organization = OPENAI_ORGANIZATION | |
title = "Car Seats Voice Commands" | |
description = """ | |
This is a demo for controlling car seats with Voice Commands, On the left there's the inputs section | |
and on the right you'll find your outputs. For the inputs you have two choices **Voice** and **Text**, | |
Use **Voice** If you want a closer experience to the final product, Or use **Text** if you just want to test the command model. | |
for the outputs you have the **transcription**(Please check that it's accurate), **command**(to know which | |
command the system detected) and you have the robot voice (again use this if you want a more real experience). | |
**Features** : You can either activate of deactivate the following features | |
- Heated Seats | |
- Cooled Seats | |
- Massage Seats | |
Examples: | |
- **Direct Commands** : Try to say something like "Activate heated seats" or "Turn Off massage seats" | |
- **Indirect Commands** : Try "My back is cold" , "No heating is needed anymore" or "I'm stressed today" | |
""" | |
article = """ | |
This demo processes commands in two steps, the first step is the transcription phase and the second is the | |
Command Classification phase. For Transcription I used The OpenAi whisper model, and for the classification | |
I Fine-Tuned the OpenAi **ada** model on Car Seats Command. | |
""" | |
def remove_punctuation(input_string): | |
translator = str.maketrans('', '', string.punctuation) | |
clean_string = input_string.translate(translator) | |
return clean_string | |
id2label = { | |
1:"massage_seats_on", | |
2:"massage_seats_off", | |
3:"heated_seats_on", | |
4:"heated_seats_off", | |
5:"cooled_seats_on", | |
6:"cooled_seats_off" | |
} | |
def get_command(command, id2label, model = "text-davinci-003"): | |
""" | |
This function get the classification outputs from openai API | |
""" | |
prompt = f""" | |
We want to control the seats of a car which has features to cool, heat, or massage a seat. The user said "{command}", Which feature we should use to ensure user comfort? Give just the number of the feature without any punctuation. | |
Mapping: | |
1: "massage_seats_on" | |
2: "massage_seats_off" | |
3: "heated_seats_on" | |
4: "heated_seats_off" | |
5: "cooled_seats_on" | |
6: "cooled_seats_off" | |
Command_Code: | |
""" | |
completion = openai.Completion.create( | |
model=model, prompt=prompt, max_tokens=2, temperature=0 | |
) | |
print("result") | |
print(completion["choices"][0]["text"].strip()) | |
id = int(remove_punctuation(completion["choices"][0]["text"]).strip()) | |
result = id2label[id] if id in id2label else "unknown" | |
return result | |
def command_tokens(command, model = "text-davinci-003"): | |
""" | |
This function get the classification outputs from openai API | |
""" | |
prompt = f""" | |
Give an array of the same length of the input, for every element of the returned array use one of the labels in the label-list | |
label-list : | |
- unit if belongs to the International System of Units | |
- value | |
- none if none of the above | |
input : [{",".join(command.split(" "))}] | |
output : | |
""" | |
completion = openai.Completion.create( | |
model=model, prompt=prompt, max_tokens=128, temperature=0 | |
) | |
result = completion["choices"][0]["text"].strip() | |
result_list = result.replace("[", "").replace("]", "").replace("'", "").split(',') | |
return list(zip(command.split(" "), result_list)) | |
def transcribe(audio): | |
""" | |
if text provided the function will classify the input directly. | |
if not the audio will be transcribed then the transcription will be classified. | |
return a tuple of (transcription, command, audio to be played) | |
""" | |
# getting text transcription | |
audio_file = open(audio, "rb") | |
transcription = openai.Audio.transcribe("whisper-1", audio_file, language="en") | |
transcription = transcription["text"] | |
result = get_command(transcription, id2label) | |
tokens = command_tokens(transcription) | |
print("result", result) | |
print("tokens", tokens) | |
return result, tokens | |
if __name__=="__main__": | |
gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(source="microphone", type="filepath"), | |
outputs=["text", "highlight"], | |
title=title, | |
description=description | |
).launch() | |