Voice_Commands / app.py
zinoubm's picture
final touches
361479c
import logging
import gradio as gr
import openai
from constants import *
import string
openai.api_key = OPENAI_API_KEY
openai.organization = OPENAI_ORGANIZATION
title = "Car Seats Voice Commands"
description = """
This is a demo for controlling car seats with Voice Commands, On the left there's the inputs section
and on the right you'll find your outputs. For the inputs you have two choices **Voice** and **Text**,
Use **Voice** If you want a closer experience to the final product, Or use **Text** if you just want to test the command model.
for the outputs you have the **transcription**(Please check that it's accurate), **command**(to know which
command the system detected) and you have the robot voice (again use this if you want a more real experience).
**Features** : You can either activate of deactivate the following features
- Heated Seats
- Cooled Seats
- Massage Seats
Examples:
- **Direct Commands** : Try to say something like "Activate heated seats" or "Turn Off massage seats"
- **Indirect Commands** : Try "My back is cold" , "No heating is needed anymore" or "I'm stressed today"
"""
article = """
This demo processes commands in two steps, the first step is the transcription phase and the second is the
Command Classification phase. For Transcription I used The OpenAi whisper model, and for the classification
I Fine-Tuned the OpenAi **ada** model on Car Seats Command.
"""
def remove_punctuation(input_string):
translator = str.maketrans('', '', string.punctuation)
clean_string = input_string.translate(translator)
return clean_string
id2label = {
1:"massage_seats_on",
2:"massage_seats_off",
3:"heated_seats_on",
4:"heated_seats_off",
5:"cooled_seats_on",
6:"cooled_seats_off"
}
def get_command(command, id2label, model = "text-davinci-003"):
"""
This function get the classification outputs from openai API
"""
prompt = f"""
We want to control the seats of a car which has features to cool, heat, or massage a seat. The user said "{command}", Which feature we should use to ensure user comfort? Give just the number of the feature without any punctuation.
Mapping:
1: "massage_seats_on"
2: "massage_seats_off"
3: "heated_seats_on"
4: "heated_seats_off"
5: "cooled_seats_on"
6: "cooled_seats_off"
Command_Code:
"""
completion = openai.Completion.create(
model=model, prompt=prompt, max_tokens=2, temperature=0
)
print("result")
print(completion["choices"][0]["text"].strip())
id = int(remove_punctuation(completion["choices"][0]["text"]).strip())
result = id2label[id] if id in id2label else "unknown"
return result
def command_tokens(command, model = "text-davinci-003"):
"""
This function get the classification outputs from openai API
"""
prompt = f"""
Give an array of the same length of the input, for every element of the returned array use one of the labels in the label-list
label-list :
- unit if belongs to the International System of Units
- value
- none if none of the above
input : [{",".join(command.split(" "))}]
output :
"""
completion = openai.Completion.create(
model=model, prompt=prompt, max_tokens=128, temperature=0
)
result = completion["choices"][0]["text"].strip()
result_list = result.replace("[", "").replace("]", "").replace("'", "").split(',')
return list(zip(command.split(" "), result_list))
def transcribe(audio):
"""
if text provided the function will classify the input directly.
if not the audio will be transcribed then the transcription will be classified.
return a tuple of (transcription, command, audio to be played)
"""
# getting text transcription
audio_file = open(audio, "rb")
transcription = openai.Audio.transcribe("whisper-1", audio_file, language="en")
transcription = transcription["text"]
result = get_command(transcription, id2label)
tokens = command_tokens(transcription)
print("result", result)
print("tokens", tokens)
return result, tokens
if __name__=="__main__":
gr.Interface(
fn=transcribe,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs=["text", "highlight"],
title=title,
description=description
).launch()