File size: 2,182 Bytes
5c75cf0
91773b9
5c75cf0
 
91773b9
 
 
 
 
 
 
 
 
5c75cf0
 
 
 
 
 
 
91773b9
5c75cf0
 
91773b9
5c75cf0
 
91773b9
5c75cf0
 
91773b9
 
 
 
 
5c75cf0
 
91773b9
 
5c75cf0
 
 
 
 
 
 
91773b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""Deploying AI Voice Chatbot Gradio App."""
import gradio as gr
from typing import Tuple

from utils import (
    TextGenerationPipeline,
    from_en_translation,
    html_audio_autoplay,
    stt,
    to_en_translation,
    tts,
    tts_to_bytesio,
)

max_answer_length = 100
desired_language = "de"
response_generator_pipe = TextGenerationPipeline(max_length=max_answer_length)


def main(audio: object) -> Tuple[str, str, str, object]:
    """Calls functions for deploying Gradio app.

    It responds both verbally and in text
    by taking voice input from the user.

    Args:
        audio (object): Recorded speech of the user.

    Returns:
        tuple containing:
        - user_speech_text (str): Recognized speech.
        - bot_response_de (str): Translated answer of the bot.
        - bot_response_en (str): Bot's original answer.
        - html (object): Autoplayer for bot's speech.
    """
    user_speech_text = stt(audio, desired_language)
    translated_text = to_en_translation(user_speech_text, desired_language)
    bot_response_en = response_generator_pipe(translated_text)
    bot_response_de = from_en_translation(bot_response_en, desired_language)
    bot_voice = tts(bot_response_de, desired_language)
    bot_voice_bytes = tts_to_bytesio(bot_voice)
    html = html_audio_autoplay(bot_voice_bytes)
    return user_speech_text, bot_response_de, bot_response_en, html


# Define the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## AI Voice Chatbot")
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Speak or Upload Audio")
        submit_btn = gr.Button("Submit")
    with gr.Row():
        user_speech_text = gr.Textbox(label="You said:", interactive=False)
        bot_response_de = gr.Textbox(label="AI said (in German):", interactive=False)
        bot_response_en = gr.Textbox(label="AI said (in English):", interactive=False)
    html_output = gr.HTML()

    # Connect the function to the components
    submit_btn.click(
        fn=main,
        inputs=[audio_input],
        outputs=[user_speech_text, bot_response_de, bot_response_en, html_output],
    )

# Launch the Gradio app
demo.launch(debug=True)