Spaces:

zinoubm
/

Voice_Commands

Runtime error

App Files Files Community

Voice_Commands / app.py

zinoubm

final touches

361479c over 1 year ago

raw

history blame contribute delete

4.37 kB

	import logging
	import gradio as gr
	import openai
	from constants import *
	import string


	openai.api_key = OPENAI_API_KEY
	openai.organization = OPENAI_ORGANIZATION

	title = "Car Seats Voice Commands"

	description = """
	This is a demo for controlling car seats with Voice Commands, On the left there's the inputs section
	and on the right you'll find your outputs. For the inputs you have two choices Voice and Text,
	Use Voice If you want a closer experience to the final product, Or use Text if you just want to test the command model.
	for the outputs you have the transcription(Please check that it's accurate), command(to know which
	command the system detected) and you have the robot voice (again use this if you want a more real experience).

	Features : You can either activate of deactivate the following features
	- Heated Seats
	- Cooled Seats
	- Massage Seats

	Examples:
	- Direct Commands : Try to say something like "Activate heated seats" or "Turn Off massage seats"
	- Indirect Commands : Try "My back is cold" , "No heating is needed anymore" or "I'm stressed today"
	"""

	article = """
	This demo processes commands in two steps, the first step is the transcription phase and the second is the
	Command Classification phase. For Transcription I used The OpenAi whisper model, and for the classification
	I Fine-Tuned the OpenAi ada model on Car Seats Command.
	"""

	def remove_punctuation(input_string):
	translator = str.maketrans('', '', string.punctuation)
	clean_string = input_string.translate(translator)

	return clean_string

	id2label = {
	1:"massage_seats_on",
	2:"massage_seats_off",
	3:"heated_seats_on",
	4:"heated_seats_off",
	5:"cooled_seats_on",
	6:"cooled_seats_off"
	}

	def get_command(command, id2label, model = "text-davinci-003"):
	"""
	This function get the classification outputs from openai API
	"""
	prompt = f"""
	We want to control the seats of a car which has features to cool, heat, or massage a seat. The user said "{command}", Which feature we should use to ensure user comfort? Give just the number of the feature without any punctuation.
	Mapping:
	1: "massage_seats_on"
	2: "massage_seats_off"
	3: "heated_seats_on"
	4: "heated_seats_off"
	5: "cooled_seats_on"
	6: "cooled_seats_off"

	Command_Code:
	"""

	completion = openai.Completion.create(
	model=model, prompt=prompt, max_tokens=2, temperature=0
	)

	print("result")
	print(completion["choices"][0]["text"].strip())

	id = int(remove_punctuation(completion["choices"][0]["text"]).strip())

	result = id2label[id] if id in id2label else "unknown"
	return result


	def command_tokens(command, model = "text-davinci-003"):
	"""
	This function get the classification outputs from openai API
	"""
	prompt = f"""
	Give an array of the same length of the input, for every element of the returned array use one of the labels in the label-list

	label-list :
	- unit if belongs to the International System of Units
	- value
	- none if none of the above

	input : [{",".join(command.split(" "))}]
	output :
	"""

	completion = openai.Completion.create(
	model=model, prompt=prompt, max_tokens=128, temperature=0
	)

	result = completion["choices"][0]["text"].strip()

	result_list = result.replace("[", "").replace("]", "").replace("'", "").split(',')

	return list(zip(command.split(" "), result_list))


	def transcribe(audio):
	"""
	if text provided the function will classify the input directly.
	if not the audio will be transcribed then the transcription will be classified.

	return a tuple of (transcription, command, audio to be played)
	"""

	# getting text transcription
	audio_file = open(audio, "rb")
	transcription = openai.Audio.transcribe("whisper-1", audio_file, language="en")
	transcription = transcription["text"]

	result = get_command(transcription, id2label)
	tokens = command_tokens(transcription)

	print("result", result)
	print("tokens", tokens)

	return result, tokens


	if __name__=="__main__":
	gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(source="microphone", type="filepath"),
	outputs=["text", "highlight"],
	title=title,
	description=description
	).launch()