File size: 2,045 Bytes
6c9dde7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import time
import numpy as np
import gradio as gr
import random

from fastrtc import AdditionalOutputs, WebRTC, ReplyOnPause

possible_responses = [
    "hello",
    "hi",
    "how's it going?",
    "what's up?",
    "how's your day?",
    "how are you?",
    "what's your name?",
    "where are you from?",
    "what do you do?",
    "what's your favorite color?",
    "what's your favorite food?",
    "what's your favorite movie?",
]


def transcribe(audio: tuple[int, np.ndarray]):
    time.sleep(1)
    transformers_convo = [
        {"role": "assistant", "content": random.choice(possible_responses)}
    ]
    yield AdditionalOutputs(transformers_convo)


with gr.Blocks() as demo:
    gr.HTML(
        """
    TEST
    """
    )
    with gr.Row():
        with gr.Column():
            audio = WebRTC(
                label="Stream",
                mode="send",
                modality="audio",
                rtc_configuration={
                    "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}],
                    "iceTransportPolicy": "all",
                    "iceCandidatePoolSize": 10,
                    "bundlePolicy": "max-bundle",
                    "rtcpMuxPolicy": "require",
                    "sdpSemantics": "unified-plan",
                },
                server_rtc_configuration={
                    "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
                },
            )
        with gr.Column():
            transcript = gr.Chatbot(label="transcript", type="messages")

    audio.stream(
        ReplyOnPause(transcribe), inputs=[audio], outputs=[audio], time_limit=180
    )
    def handle_additional_outputs(outputs):
        print(f"outputs: {outputs}")
        return outputs
            
    audio.on_additional_outputs(
            fn=handle_additional_outputs,
            outputs=[transcript],
            concurrency_limit=10,
            queue=False,
            show_progress="hidden",
    )
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)