|
import openai |
|
import gradio as gr |
|
|
|
openai.api_key = "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" |
|
|
|
def transcribe(audio): |
|
with open(audio, "rb") as audio_file: |
|
transcript = openai.Audio.transcribe("whisper-1", audio_file) |
|
return transcript["text"] |
|
|
|
def generate_response(transcribed_text): |
|
response = openai.Completion.create( |
|
engine="text-davinci-003", |
|
prompt=transcribed_text, |
|
max_tokens=1024, |
|
n=1, |
|
stop=None, |
|
temperature=0.5, |
|
) |
|
return response.choices[0].text |
|
|
|
def run_cmd(command): |
|
try: |
|
print(command) |
|
call(command) |
|
except KeyboardInterrupt: |
|
print("Process interrupted") |
|
sys.exit(1) |
|
|
|
def inference(text): |
|
cmd = ['tts', '--text', text] |
|
run_cmd(cmd) |
|
return 'tts_output.wav' |
|
|
|
def process_audio_and_respond(audio): |
|
text = transcribe(audio) |
|
response_text = generate_response(text) |
|
output_file = inference(response_text) |
|
return output_file |
|
|
|
demo = gr.Blocks() |
|
|
|
with demo: |
|
audio_file = gr.inputs.Audio(source="microphone", type="filepath") |
|
button = gr.Button("Uliza Swali") |
|
outputs = gr.outputs.Audio(type="filepath", label="Output Audio") |
|
|
|
button.click(fn=process_audio_and_respond, inputs=audio_file, outputs=outputs) |
|
|
|
demo.launch() |
|
|