File size: 3,579 Bytes
98b5314
7f93957
 
1b44681
7f93957
1b44681
7f93957
 
 
 
b47f6e6
 
 
 
7f93957
aa5cf68
 
 
 
7f93957
 
 
 
 
 
 
7ab8364
4516d66
 
 
 
 
 
 
 
 
 
 
 
 
 
9869d32
752ff58
03de940
 
 
9869d32
bad025d
 
 
 
 
4516d66
 
 
 
03de940
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
afc99ad
4516d66
 
9869d32
4516d66
 
bad025d
4516d66
 
07e0994
4516d66
bad025d
07e0994
1b44681
07e0994
bad025d
1fa0886
 
 
 
 
 
 
 
 
bad025d
07e0994
1b44681
1fa0886
 
4516d66
 
 
1b44681
4516d66
 
7ab8364
98b5314
7f93957
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import time
import random
import gradio as gr
from transformers import pipeline

p = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")

def user(user_message, history):
    return "", history + [[user_message, None]]

# def transcribe(audio, state=""):
#     text = p(audio)["text"]
#     state += text + " "
#     return state, state

def transcribe(audio):
    text = p(audio)["text"]
    return text

def bot(history):
    bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
    history[-1][1] = ""
    for character in bot_message:
        history[-1][1] += character
        time.sleep(0.05)
        yield history

css = """
        .gradio-container {
            font-family: 'IBM Plex Sans', sans-serif;
        }
        .gr-button {
            color: white;
            border-color: black;
            background: black;
        }
        .container {
            max-width: 730px;
            margin: auto;
            padding-top: 1.5rem;
        }
        #chatbot {
            min-height: 30rem;
            margin-bottom: 15px;
            margin-left: auto;
            margin-right: auto;
        }
        #prompt-container {
            margin-bottom: 15px;
            margin-left: auto;
            margin-right: auto;
        }
"""

with gr.Blocks(css=css) as demo:

    gr.HTML(
        """
            <div style="text-align: center; margin: 0 auto;">
              <div
                style="
                  display: inline-flex;
                  align-items: center;
                  gap: 0.8rem;
                  font-size: 1.75rem;
                "
              >
                <h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
                  Interview with AI (Really?)
                </h1>
              </div>
            </div>
        """
    )
    
    with gr.Box():
        
        chatbot = gr.Chatbot([], show_label=False, elem_id="chatbot").style(height="auto")
    
        with gr.Row(elem_id="prompt-container").style(mobile_collapse=False, equal_height=True):
            with gr.Column(scale=0.8):
                txt = gr.Textbox(
                    show_label=False,
                    placeholder="Type and press enter, or record your response...",
                ).style(container=False)
            with gr.Column(scale=0.2, min_width=0):
                send = gr.Button("Send")

        with gr.Row(elem_id="audio-container").style(mobile_collapse=False, equal_height=True):
            with gr.Column(scale=0.8):
                recorder = gr.Audio(source="microphone", type="filepath", show_label=False, visible=False).style(container=False)

                action_btn = gradio.Button('Start')
                def next_line(action, _):
                    if action == 'Start':
                        return {action_btn: 'Next', recorder: gradio.update(visible=True)}
                    else:
                        return {action_btn: 'Done', recorder: gradio.update(visible=False)}
                
            with gr.Column(scale=0.2, min_width=0):
                speech = gr.Button("Submit speech")

        action_btn.click(next_line, inputs=[action_btn, recorder], outputs=[action_btn, recorder])
        speech.click(transcribe, inputs=recorder, outputs=txt)
        txt.submit(user, [txt, chatbot], [txt, chatbot], queue=False).then(
            bot, chatbot, chatbot
        )
        send.click(user, [txt, chatbot], [txt, chatbot], queue=False).then(
            bot, chatbot, chatbot
        )

demo.queue()
demo.launch()