Spaces:

zinoubm
/

Voice_Commands

Runtime error

File size: 4,366 Bytes

import logging
import gradio as gr
import openai
from constants import *
import string


openai.api_key = OPENAI_API_KEY
openai.organization = OPENAI_ORGANIZATION

title = "Car Seats Voice Commands"

description = """
This is a demo for controlling car seats with Voice Commands, On the left there's the inputs section
and on the right you'll find your outputs. For the inputs you have two choices **Voice** and **Text**,
Use **Voice** If you want a closer experience to the final product, Or use **Text** if you just want to test the command model.
for the outputs you have the **transcription**(Please check that it's accurate), **command**(to know which
command the system detected) and you have the robot voice (again use this if you want a more real experience).

**Features** : You can either activate of deactivate the following features 
- Heated Seats 
- Cooled Seats 
- Massage Seats 

Examples:
- **Direct Commands** : Try to say something like "Activate heated seats" or "Turn Off massage seats"
- **Indirect Commands** : Try "My back is cold" , "No heating is needed anymore" or "I'm stressed today"
"""

article = """
This demo processes commands in two steps, the first step is the transcription phase and the second is the
Command Classification phase. For Transcription I used The OpenAi whisper model, and for the classification 
I Fine-Tuned the OpenAi **ada** model on Car Seats Command.
"""

def remove_punctuation(input_string):
    translator = str.maketrans('', '', string.punctuation)
    clean_string = input_string.translate(translator)

    return clean_string

id2label = {
    1:"massage_seats_on",
    2:"massage_seats_off",
    3:"heated_seats_on",
    4:"heated_seats_off",
    5:"cooled_seats_on",
    6:"cooled_seats_off"
}

def get_command(command, id2label, model = "text-davinci-003"):
    """
    This function get the classification outputs from openai API
    """
    prompt = f"""
    We want to control the seats of a car which has features to cool, heat, or massage a seat. The user said "{command}", Which feature we should use to ensure user comfort? Give just the number of the feature without any punctuation.
    Mapping:
    1: "massage_seats_on"
    2: "massage_seats_off"
    3: "heated_seats_on"
    4: "heated_seats_off"
    5: "cooled_seats_on"
    6: "cooled_seats_off"

    Command_Code:
    """

    completion = openai.Completion.create(
        model=model, prompt=prompt, max_tokens=2, temperature=0
    )

    print("result")
    print(completion["choices"][0]["text"].strip())

    id = int(remove_punctuation(completion["choices"][0]["text"]).strip())

    result = id2label[id] if id in id2label else "unknown"
    return result


def command_tokens(command, model = "text-davinci-003"):
    """
    This function get the classification outputs from openai API
    """
    prompt = f"""
        Give an array of the same length of the input, for every element of the returned array use  one of the labels in the label-list

        label-list :
        - unit if belongs to the International System of Units
        - value
        - none if none of the above

        input : [{",".join(command.split(" "))}]
        output :
    """

    completion = openai.Completion.create(
        model=model, prompt=prompt, max_tokens=128, temperature=0
    )

    result = completion["choices"][0]["text"].strip()

    result_list = result.replace("[", "").replace("]", "").replace("'", "").split(',')

    return list(zip(command.split(" "), result_list))


def transcribe(audio):
    """
    if text provided the function will classify the input directly.
    if not the audio will be transcribed then the transcription will be classified.

    return a tuple of (transcription, command, audio to be played)
    """

    # getting text transcription
    audio_file = open(audio, "rb")
    transcription = openai.Audio.transcribe("whisper-1", audio_file, language="en")
    transcription = transcription["text"]

    result = get_command(transcription, id2label)
    tokens = command_tokens(transcription)

    print("result", result)
    print("tokens", tokens)

    return result, tokens


if __name__=="__main__":
    gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(source="microphone", type="filepath"),
    outputs=["text", "highlight"],
    title=title,
    description=description
    ).launch()