File size: 4,366 Bytes
361479c
0013d95
bb7af57
4f8f8b7
361479c
bb7af57
0013d95
bb7af57
361479c
bb7af57
e2409e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361479c
 
 
0013d95
361479c
3dacc3e
361479c
 
 
 
 
 
 
 
3dacc3e
361479c
1a81701
 
 
3dacc3e
361479c
3dacc3e
 
 
 
 
 
 
 
 
 
 
bb7af57
361479c
bb7af57
361479c
 
 
 
 
3dacc3e
bb7af57
 
 
 
361479c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a81701
 
 
 
361479c
 
4f8f8b7
e6c597c
 
 
 
1a81701
361479c
 
 
 
 
1a81701
361479c
0013d95
 
361479c
bb7af57
361479c
 
 
 
 
bb7af57
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import logging
import gradio as gr
import openai
from constants import *
import string


openai.api_key = OPENAI_API_KEY
openai.organization = OPENAI_ORGANIZATION

title = "Car Seats Voice Commands"

description = """
This is a demo for controlling car seats with Voice Commands, On the left there's the inputs section
and on the right you'll find your outputs. For the inputs you have two choices **Voice** and **Text**,
Use **Voice** If you want a closer experience to the final product, Or use **Text** if you just want to test the command model.
for the outputs you have the **transcription**(Please check that it's accurate), **command**(to know which
command the system detected) and you have the robot voice (again use this if you want a more real experience).

**Features** : You can either activate of deactivate the following features 
- Heated Seats 
- Cooled Seats 
- Massage Seats 

Examples:
- **Direct Commands** : Try to say something like "Activate heated seats" or "Turn Off massage seats"
- **Indirect Commands** : Try "My back is cold" , "No heating is needed anymore" or "I'm stressed today"
"""

article = """
This demo processes commands in two steps, the first step is the transcription phase and the second is the
Command Classification phase. For Transcription I used The OpenAi whisper model, and for the classification 
I Fine-Tuned the OpenAi **ada** model on Car Seats Command.
"""

def remove_punctuation(input_string):
    translator = str.maketrans('', '', string.punctuation)
    clean_string = input_string.translate(translator)

    return clean_string

id2label = {
    1:"massage_seats_on",
    2:"massage_seats_off",
    3:"heated_seats_on",
    4:"heated_seats_off",
    5:"cooled_seats_on",
    6:"cooled_seats_off"
}

def get_command(command, id2label, model = "text-davinci-003"):
    """
    This function get the classification outputs from openai API
    """
    prompt = f"""
    We want to control the seats of a car which has features to cool, heat, or massage a seat. The user said "{command}", Which feature we should use to ensure user comfort? Give just the number of the feature without any punctuation.
    Mapping:
    1: "massage_seats_on"
    2: "massage_seats_off"
    3: "heated_seats_on"
    4: "heated_seats_off"
    5: "cooled_seats_on"
    6: "cooled_seats_off"

    Command_Code:
    """

    completion = openai.Completion.create(
        model=model, prompt=prompt, max_tokens=2, temperature=0
    )

    print("result")
    print(completion["choices"][0]["text"].strip())

    id = int(remove_punctuation(completion["choices"][0]["text"]).strip())

    result = id2label[id] if id in id2label else "unknown"
    return result


def command_tokens(command, model = "text-davinci-003"):
    """
    This function get the classification outputs from openai API
    """
    prompt = f"""
        Give an array of the same length of the input, for every element of the returned array use  one of the labels in the label-list

        label-list :
        - unit if belongs to the International System of Units
        - value
        - none if none of the above

        input : [{",".join(command.split(" "))}]
        output :
    """

    completion = openai.Completion.create(
        model=model, prompt=prompt, max_tokens=128, temperature=0
    )

    result = completion["choices"][0]["text"].strip()

    result_list = result.replace("[", "").replace("]", "").replace("'", "").split(',')

    return list(zip(command.split(" "), result_list))


def transcribe(audio):
    """
    if text provided the function will classify the input directly.
    if not the audio will be transcribed then the transcription will be classified.

    return a tuple of (transcription, command, audio to be played)
    """

    # getting text transcription
    audio_file = open(audio, "rb")
    transcription = openai.Audio.transcribe("whisper-1", audio_file, language="en")
    transcription = transcription["text"]

    result = get_command(transcription, id2label)
    tokens = command_tokens(transcription)

    print("result", result)
    print("tokens", tokens)

    return result, tokens


if __name__=="__main__":
    gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(source="microphone", type="filepath"),
    outputs=["text", "highlight"],
    title=title,
    description=description
    ).launch()