freddyaboulton HF Staff commited on
Commit
1b654d3
·
verified ·
1 Parent(s): 25139f5

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +2 -1
  2. app.py +118 -0
  3. index.html +569 -0
  4. requirements.txt +6 -0
README.md CHANGED
@@ -9,6 +9,7 @@ app_file: app.py
9
  pinned: false
10
  license: mit
11
  short_description: Compare Round Trip Times between WebRTC and Websockets
 
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
9
  pinned: false
10
  license: mit
11
  short_description: Compare Round Trip Times between WebRTC and Websockets
12
+ tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|ELEVENLABS_API_KEY, secret|GROQ_API_KEY, secret|ANTHROPIC_API_KEY]
13
  ---
14
 
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+
3
+ import numpy as np
4
+ import gradio as gr
5
+ from gradio.utils import get_space
6
+ from fastrtc import ReplyOnPause, Stream, AdditionalOutputs, get_twilio_turn_credentials
7
+ from fastrtc.utils import audio_to_bytes, aggregate_bytes_to_16bit
8
+ from pathlib import Path
9
+ from fastapi.responses import HTMLResponse, StreamingResponse
10
+ from groq import Groq
11
+ import anthropic
12
+ from elevenlabs import ElevenLabs
13
+ import os
14
+ from pydantic import BaseModel
15
+ import json
16
+
17
+
18
+ load_dotenv()
19
+
20
+ groq_client = Groq()
21
+ claude_client = anthropic.Anthropic()
22
+ tts_client = ElevenLabs(api_key=os.environ["ELEVENLABS_API_KEY"])
23
+
24
+ curr_dir = Path(__file__).parent
25
+
26
+
27
+ def response(
28
+ audio: tuple[int, np.ndarray],
29
+ chatbot: list[dict] | None = None,
30
+ ):
31
+ chatbot = chatbot or []
32
+ messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
33
+ prompt = groq_client.audio.transcriptions.create(
34
+ file=("audio-file.mp3", audio_to_bytes(audio)),
35
+ model="whisper-large-v3-turbo",
36
+ response_format="verbose_json",
37
+ ).text
38
+ print("prompt", prompt)
39
+ chatbot.append({"role": "user", "content": prompt})
40
+ messages.append({"role": "user", "content": prompt})
41
+ response = claude_client.messages.create(
42
+ model="claude-3-5-haiku-20241022",
43
+ max_tokens=512,
44
+ messages=messages, # type: ignore
45
+ )
46
+ response_text = " ".join(
47
+ block.text # type: ignore
48
+ for block in response.content
49
+ if getattr(block, "type", None) == "text"
50
+ )
51
+ chatbot.append({"role": "assistant", "content": response_text})
52
+ yield AdditionalOutputs(chatbot)
53
+ iterator = tts_client.text_to_speech.convert_as_stream(
54
+ text=response_text,
55
+ voice_id="JBFqnCBsd6RMkjVDRZzb",
56
+ model_id="eleven_multilingual_v2",
57
+ output_format="pcm_24000",
58
+ )
59
+ for chunk in aggregate_bytes_to_16bit(iterator):
60
+ audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
61
+ yield (24000, audio_array, "mono")
62
+
63
+
64
+ chatbot = gr.Chatbot(type="messages")
65
+ stream = Stream(
66
+ modality="audio",
67
+ mode="send-receive",
68
+ handler=ReplyOnPause(response),
69
+ additional_outputs_handler=lambda a, b: b,
70
+ additional_inputs=[chatbot],
71
+ additional_outputs=[chatbot],
72
+ rtc_configuration=get_twilio_turn_credentials()
73
+ if get_space() == "gradio"
74
+ else None,
75
+ )
76
+
77
+
78
+ class Message(BaseModel):
79
+ role: str
80
+ content: str
81
+
82
+
83
+ class InputData(BaseModel):
84
+ webrtc_id: str
85
+ chatbot: list[Message]
86
+
87
+
88
+ @stream.get("/")
89
+ async def _():
90
+ rtc_config = get_twilio_turn_credentials() if get_space() else "{}"
91
+ html_content = (curr_dir / "index.html").read_text()
92
+ html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
93
+ return HTMLResponse(content=html_content, status_code=200)
94
+
95
+
96
+ @stream.post("/input_hook")
97
+ async def _(body: InputData):
98
+ stream.set_input(body.webrtc_id, body.model_dump()["chatbot"])
99
+ return {"status": "ok"}
100
+
101
+
102
+ @stream.get("/outputs")
103
+ def _(webrtc_id: str):
104
+ print("outputs", webrtc_id)
105
+
106
+ async def output_stream():
107
+ async for output in stream.output_stream(webrtc_id):
108
+ chatbot = output.args[0]
109
+ yield f"event: output\ndata: {json.dumps(chatbot[-2])}\n\n"
110
+ yield f"event: output\ndata: {json.dumps(chatbot[-1])}\n\n"
111
+
112
+ return StreamingResponse(output_stream(), media_type="text/event-stream")
113
+
114
+
115
+ if __name__ == "__main__":
116
+ import uvicorn
117
+
118
+ s = uvicorn.run(stream, port=7860, host="0.0.0.0")
index.html ADDED
@@ -0,0 +1,569 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>WebRTC vs WebSocket Benchmark</title>
8
+ <script src="https://cdn.jsdelivr.net/npm/alawmulaw"></script>
9
+ <style>
10
+ body {
11
+ font-family: system-ui, -apple-system, sans-serif;
12
+ margin: 0;
13
+ padding: 20px;
14
+ background-color: #f5f5f5;
15
+ }
16
+
17
+ .container {
18
+ display: grid;
19
+ grid-template-columns: 1fr 1fr;
20
+ gap: 30px;
21
+ max-width: 1400px;
22
+ margin: 0 auto;
23
+ }
24
+
25
+ .panel {
26
+ background: white;
27
+ border-radius: 12px;
28
+ padding: 20px;
29
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
30
+ }
31
+
32
+ .chat-container {
33
+ height: 400px;
34
+ overflow-y: auto;
35
+ border: 1px solid #e0e0e0;
36
+ border-radius: 8px;
37
+ padding: 15px;
38
+ margin-bottom: 15px;
39
+ }
40
+
41
+ .message {
42
+ margin-bottom: 10px;
43
+ padding: 8px 12px;
44
+ border-radius: 8px;
45
+ max-width: 80%;
46
+ }
47
+
48
+ .message.user {
49
+ background-color: #e3f2fd;
50
+ margin-left: auto;
51
+ }
52
+
53
+ .message.assistant {
54
+ background-color: #f5f5f5;
55
+ }
56
+
57
+ .metrics {
58
+ margin-top: 15px;
59
+ padding: 10px;
60
+ background: #f8f9fa;
61
+ border-radius: 8px;
62
+ }
63
+
64
+ .metric {
65
+ margin: 5px 0;
66
+ font-size: 14px;
67
+ }
68
+
69
+ button {
70
+ background-color: #1976d2;
71
+ color: white;
72
+ border: none;
73
+ padding: 10px 20px;
74
+ border-radius: 6px;
75
+ cursor: pointer;
76
+ font-size: 14px;
77
+ transition: background-color 0.2s;
78
+ }
79
+
80
+ button:hover {
81
+ background-color: #1565c0;
82
+ }
83
+
84
+ button:disabled {
85
+ background-color: #bdbdbd;
86
+ cursor: not-allowed;
87
+ }
88
+
89
+ h2 {
90
+ margin-top: 0;
91
+ color: #1976d2;
92
+ }
93
+
94
+ .visualizer {
95
+ width: 100%;
96
+ height: 100px;
97
+ margin: 10px 0;
98
+ background: #fafafa;
99
+ border-radius: 8px;
100
+ }
101
+
102
+ /* Add styles for disclaimer */
103
+ .disclaimer {
104
+ background-color: #fff3e0;
105
+ padding: 15px;
106
+ border-radius: 8px;
107
+ margin-bottom: 20px;
108
+ font-size: 14px;
109
+ line-height: 1.5;
110
+ max-width: 1400px;
111
+ margin: 0 auto 20px auto;
112
+ }
113
+
114
+ /* Update nav bar styles */
115
+ .nav-bar {
116
+ background-color: #f5f5f5;
117
+ padding: 10px 20px;
118
+ margin-bottom: 20px;
119
+ }
120
+
121
+ .nav-container {
122
+ max-width: 1400px;
123
+ margin: 0 auto;
124
+ display: flex;
125
+ gap: 10px;
126
+ }
127
+
128
+ .nav-button {
129
+ background-color: #1976d2;
130
+ color: white;
131
+ border: none;
132
+ padding: 8px 16px;
133
+ border-radius: 4px;
134
+ cursor: pointer;
135
+ text-decoration: none;
136
+ font-size: 14px;
137
+ transition: background-color 0.2s;
138
+ }
139
+
140
+ .nav-button:hover {
141
+ background-color: #1565c0;
142
+ }
143
+ </style>
144
+ </head>
145
+
146
+ <body>
147
+ <nav class="nav-bar">
148
+ <div class="nav-container">
149
+ <a href="/webrtc/docs" class="nav-button">WebRTC Docs</a>
150
+ <a href="/websocket/docs" class="nav-button">WebSocket Docs</a>
151
+ <a href="/telephone/docs" class="nav-button">Telephone Docs</a>
152
+ <a href="/ui" class="nav-button">UI</a>
153
+ </div>
154
+ </nav>
155
+
156
+ <div class="disclaimer">
157
+ This page compares the WebRTC Round-Trip-Time calculated from <code>getStats()</code> to the time taken to
158
+ process a ping/pong response pattern over websockets. It may not be a gold standard benchmark. Both WebRTC and
159
+ Websockets have their merits/advantages which is why FastRTC supports both. Artifacts in the WebSocket playback
160
+ audio are due to gaps in my frontend processing code and not FastRTC web server.
161
+ </div>
162
+
163
+ <div class="container">
164
+ <div class="panel">
165
+ <h2>WebRTC Connection</h2>
166
+ <div id="webrtc-chat" class="chat-container"></div>
167
+ <div id="webrtc-metrics" class="metrics">
168
+ <div class="metric">RTT (Round Trip Time): <span id="webrtc-rtt">-</span></div>
169
+ </div>
170
+ <button id="webrtc-button">Connect WebRTC</button>
171
+ </div>
172
+
173
+ <div class="panel">
174
+ <h2>WebSocket Connection</h2>
175
+ <div id="ws-chat" class="chat-container"></div>
176
+ <div id="ws-metrics" class="metrics">
177
+ <div class="metric">RTT (Round Trip Time): <span id="ws-rtt">0</span></div>
178
+ </div>
179
+ <button id="ws-button">Connect WebSocket</button>
180
+ </div>
181
+ </div>
182
+
183
+ <audio id="webrtc-audio" style="display: none;"></audio>
184
+ <audio id="ws-audio" style="display: none;"></audio>
185
+
186
+ <script>
187
+ // Shared utilities
188
+ function generateId() {
189
+ return Math.random().toString(36).substring(7);
190
+ }
191
+
192
+ function sendInput(id) {
193
+
194
+ return function handleMessage(event) {
195
+ const eventJson = JSON.parse(event.data);
196
+ if (eventJson.type === "send_input") {
197
+ fetch('/input_hook', {
198
+ method: 'POST',
199
+ headers: {
200
+ 'Content-Type': 'application/json',
201
+ },
202
+ body: JSON.stringify({
203
+ webrtc_id: id,
204
+ chatbot: chatHistoryWebRTC
205
+ })
206
+ });
207
+ }
208
+ }
209
+ }
210
+
211
+ let chatHistoryWebRTC = [];
212
+ let chatHistoryWebSocket = [];
213
+
214
+ function addMessage(containerId, role, content) {
215
+ const container = document.getElementById(containerId);
216
+ const messageDiv = document.createElement('div');
217
+ messageDiv.classList.add('message', role);
218
+ messageDiv.textContent = content;
219
+ container.appendChild(messageDiv);
220
+ container.scrollTop = container.scrollHeight;
221
+ if (containerId === 'webrtc-chat') {
222
+ chatHistoryWebRTC.push({ role, content });
223
+ } else {
224
+ chatHistoryWebSocket.push({ role, content });
225
+ }
226
+ }
227
+
228
+ // WebRTC Implementation
229
+ let webrtcPeerConnection;
230
+
231
+ // Add this function to collect RTT stats
232
+ async function updateWebRTCStats() {
233
+ if (!webrtcPeerConnection) return;
234
+
235
+ const stats = await webrtcPeerConnection.getStats();
236
+ stats.forEach(report => {
237
+ if (report.type === 'candidate-pair' && report.state === 'succeeded') {
238
+ const rtt = report.currentRoundTripTime * 1000; // Convert to ms
239
+ document.getElementById('webrtc-rtt').textContent = `${rtt.toFixed(2)}ms`;
240
+ }
241
+ });
242
+ }
243
+
244
+ async function setupWebRTC() {
245
+ const button = document.getElementById('webrtc-button');
246
+ button.textContent = "Stop";
247
+
248
+ const config = __RTC_CONFIGURATION__;
249
+ webrtcPeerConnection = new RTCPeerConnection(config);
250
+ const webrtcId = generateId();
251
+
252
+ try {
253
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
254
+ stream.getTracks().forEach(track => {
255
+ webrtcPeerConnection.addTrack(track, stream);
256
+ });
257
+
258
+ webrtcPeerConnection.addEventListener('track', (evt) => {
259
+ const audio = document.getElementById('webrtc-audio');
260
+ if (audio.srcObject !== evt.streams[0]) {
261
+ audio.srcObject = evt.streams[0];
262
+ audio.play();
263
+ }
264
+ });
265
+
266
+ const dataChannel = webrtcPeerConnection.createDataChannel('text');
267
+ dataChannel.onmessage = sendInput(webrtcId);
268
+
269
+ const offer = await webrtcPeerConnection.createOffer();
270
+ await webrtcPeerConnection.setLocalDescription(offer);
271
+
272
+ const response = await fetch('/webrtc/offer', {
273
+ method: 'POST',
274
+ headers: { 'Content-Type': 'application/json' },
275
+ body: JSON.stringify({
276
+ sdp: offer.sdp,
277
+ type: offer.type,
278
+ webrtc_id: webrtcId
279
+ })
280
+ });
281
+
282
+ const serverResponse = await response.json();
283
+ await webrtcPeerConnection.setRemoteDescription(serverResponse);
284
+
285
+ // Setup event source for messages
286
+ const eventSource = new EventSource('/outputs?webrtc_id=' + webrtcId);
287
+ eventSource.addEventListener("output", (event) => {
288
+ const eventJson = JSON.parse(event.data);
289
+ addMessage('webrtc-chat', eventJson.role, eventJson.content);
290
+ });
291
+
292
+ // Add periodic stats collection
293
+ const statsInterval = setInterval(updateWebRTCStats, 1000);
294
+
295
+ // Store the interval ID on the connection
296
+ webrtcPeerConnection.statsInterval = statsInterval;
297
+
298
+ } catch (err) {
299
+ console.error('WebRTC setup error:', err);
300
+ }
301
+ }
302
+
303
+ function webrtc_stop() {
304
+ if (webrtcPeerConnection) {
305
+ // Clear the stats interval
306
+ if (webrtcPeerConnection.statsInterval) {
307
+ clearInterval(webrtcPeerConnection.statsInterval);
308
+ }
309
+
310
+ // Close all tracks
311
+ webrtcPeerConnection.getSenders().forEach(sender => {
312
+ if (sender.track) {
313
+ sender.track.stop();
314
+ }
315
+ });
316
+
317
+ webrtcPeerConnection.close();
318
+ webrtcPeerConnection = null;
319
+
320
+ // Reset metrics display
321
+ document.getElementById('webrtc-rtt').textContent = '-';
322
+ }
323
+ }
324
+
325
+ // WebSocket Implementation
326
+ let webSocket;
327
+ let wsMetrics = {
328
+ pingStartTime: 0,
329
+ rttValues: []
330
+ };
331
+
332
+ // Load mu-law library
333
+
334
+ // Add load promise to track when the script is ready
335
+
336
+
337
+ function resample(audioData, fromSampleRate, toSampleRate) {
338
+ const ratio = fromSampleRate / toSampleRate;
339
+ const newLength = Math.round(audioData.length / ratio);
340
+ const result = new Float32Array(newLength);
341
+
342
+ for (let i = 0; i < newLength; i++) {
343
+ const position = i * ratio;
344
+ const index = Math.floor(position);
345
+ const fraction = position - index;
346
+
347
+ if (index + 1 < audioData.length) {
348
+ result[i] = audioData[index] * (1 - fraction) + audioData[index + 1] * fraction;
349
+ } else {
350
+ result[i] = audioData[index];
351
+ }
352
+ }
353
+ return result;
354
+ }
355
+
356
+ function convertToMulaw(audioData, sampleRate) {
357
+ // Resample to 8000 Hz if needed
358
+ if (sampleRate !== 8000) {
359
+ audioData = resample(audioData, sampleRate, 8000);
360
+ }
361
+
362
+ // Convert float32 [-1,1] to int16 [-32768,32767]
363
+ const int16Data = new Int16Array(audioData.length);
364
+ for (let i = 0; i < audioData.length; i++) {
365
+ int16Data[i] = Math.floor(audioData[i] * 32768);
366
+ }
367
+
368
+ // Convert to mu-law using the library
369
+ return alawmulaw.mulaw.encode(int16Data);
370
+ }
371
+
372
+ async function setupWebSocket() {
373
+ const button = document.getElementById('ws-button');
374
+ button.textContent = "Stop";
375
+
376
+ try {
377
+ const stream = await navigator.mediaDevices.getUserMedia({
378
+ audio: {
379
+ "echoCancellation": true,
380
+ "noiseSuppression": { "exact": true },
381
+ "autoGainControl": { "exact": true },
382
+ "sampleRate": { "ideal": 24000 },
383
+ "sampleSize": { "ideal": 16 },
384
+ "channelCount": { "exact": 1 },
385
+ }
386
+ });
387
+ const wsId = generateId();
388
+ wsMetrics.startTime = performance.now();
389
+
390
+ // Create audio context and analyser for visualization
391
+ const audioContext = new AudioContext();
392
+ const analyser = audioContext.createAnalyser();
393
+ const source = audioContext.createMediaStreamSource(stream);
394
+ source.connect(analyser);
395
+
396
+ // Connect to websocket endpoint
397
+ webSocket = new WebSocket(`${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/websocket/offer`);
398
+
399
+ webSocket.onopen = () => {
400
+ // Send initial start message
401
+ webSocket.send(JSON.stringify({
402
+ event: "start",
403
+ websocket_id: wsId
404
+ }));
405
+
406
+ // Setup audio processing
407
+ const processor = audioContext.createScriptProcessor(2048, 1, 1);
408
+ source.connect(processor);
409
+ processor.connect(audioContext.destination);
410
+
411
+ processor.onaudioprocess = (e) => {
412
+ const inputData = e.inputBuffer.getChannelData(0);
413
+ const mulawData = convertToMulaw(inputData, audioContext.sampleRate);
414
+ const base64Audio = btoa(String.fromCharCode.apply(null, mulawData));
415
+ if (webSocket.readyState === WebSocket.OPEN) {
416
+ webSocket.send(JSON.stringify({
417
+ event: "media",
418
+ media: {
419
+ payload: base64Audio
420
+ }
421
+ }));
422
+ }
423
+ };
424
+
425
+ // Add ping interval
426
+ webSocket.pingInterval = setInterval(() => {
427
+ wsMetrics.pingStartTime = performance.now();
428
+ webSocket.send(JSON.stringify({
429
+ event: "ping"
430
+ }));
431
+ }, 500);
432
+ };
433
+
434
+ // Setup audio output context
435
+ const outputContext = new AudioContext({ sampleRate: 24000 });
436
+ const sampleRate = 24000; // Updated to match server sample rate
437
+ let audioQueue = [];
438
+ let isPlaying = false;
439
+
440
+ webSocket.onmessage = (event) => {
441
+ const data = JSON.parse(event.data);
442
+ if (data?.type === "send_input") {
443
+ console.log("sending input")
444
+ fetch('/input_hook', {
445
+ method: 'POST',
446
+ headers: { 'Content-Type': 'application/json' },
447
+ body: JSON.stringify({ webrtc_id: wsId, chatbot: chatHistoryWebSocket })
448
+ });
449
+ }
450
+ if (data.event === "media") {
451
+ // Process received audio
452
+ const audioData = atob(data.media.payload);
453
+ const mulawData = new Uint8Array(audioData.length);
454
+ for (let i = 0; i < audioData.length; i++) {
455
+ mulawData[i] = audioData.charCodeAt(i);
456
+ }
457
+
458
+ // Convert mu-law to linear PCM
459
+ const linearData = alawmulaw.mulaw.decode(mulawData);
460
+
461
+ // Create an AudioBuffer
462
+ const audioBuffer = outputContext.createBuffer(1, linearData.length, sampleRate);
463
+ const channelData = audioBuffer.getChannelData(0);
464
+
465
+ // Fill the buffer with the decoded data
466
+ for (let i = 0; i < linearData.length; i++) {
467
+ channelData[i] = linearData[i] / 32768.0;
468
+ }
469
+
470
+ // Queue the audio buffer
471
+ audioQueue.push(audioBuffer);
472
+
473
+ // Start playing if not already playing
474
+ if (!isPlaying) {
475
+ playNextBuffer();
476
+ }
477
+ }
478
+
479
+ // Add pong handler
480
+ if (data.event === "pong") {
481
+ const rtt = performance.now() - wsMetrics.pingStartTime;
482
+ wsMetrics.rttValues.push(rtt);
483
+ // Keep only last 20 values for running mean
484
+ if (wsMetrics.rttValues.length > 20) {
485
+ wsMetrics.rttValues.shift();
486
+ }
487
+ const avgRtt = wsMetrics.rttValues.reduce((a, b) => a + b, 0) / wsMetrics.rttValues.length;
488
+ document.getElementById('ws-rtt').textContent = `${avgRtt.toFixed(2)}ms`;
489
+ return;
490
+ }
491
+ };
492
+
493
+ function playNextBuffer() {
494
+ if (audioQueue.length === 0) {
495
+ isPlaying = false;
496
+ return;
497
+ }
498
+
499
+ isPlaying = true;
500
+ const bufferSource = outputContext.createBufferSource();
501
+ bufferSource.buffer = audioQueue.shift();
502
+ bufferSource.connect(outputContext.destination);
503
+
504
+ bufferSource.onended = playNextBuffer;
505
+ bufferSource.start();
506
+ }
507
+
508
+ const eventSource = new EventSource('/outputs?webrtc_id=' + wsId);
509
+ eventSource.addEventListener("output", (event) => {
510
+ console.log("ws output", event);
511
+ const eventJson = JSON.parse(event.data);
512
+ addMessage('ws-chat', eventJson.role, eventJson.content);
513
+ });
514
+
515
+ } catch (err) {
516
+ console.error('WebSocket setup error:', err);
517
+ button.disabled = false;
518
+ }
519
+ }
520
+
521
+ function ws_stop() {
522
+ if (webSocket) {
523
+ webSocket.send(JSON.stringify({
524
+ event: "stop"
525
+ }));
526
+ // Clear ping interval
527
+ if (webSocket.pingInterval) {
528
+ clearInterval(webSocket.pingInterval);
529
+ }
530
+ // Reset RTT display
531
+ document.getElementById('ws-rtt').textContent = '-';
532
+ wsMetrics.rttValues = [];
533
+
534
+ // Clear the stats interval
535
+ if (webSocket.statsInterval) {
536
+ clearInterval(webSocket.statsInterval);
537
+ }
538
+ webSocket.close();
539
+ }
540
+ }
541
+
542
+ // Event Listeners
543
+ document.getElementById('webrtc-button').addEventListener('click', () => {
544
+ const button = document.getElementById('webrtc-button');
545
+ if (button.textContent === 'Connect WebRTC') {
546
+ setupWebRTC();
547
+ } else {
548
+ webrtc_stop();
549
+ button.textContent = 'Connect WebRTC';
550
+ }
551
+ });
552
+ const ws_start_button = document.getElementById('ws-button')
553
+ ws_start_button.addEventListener('click', () => {
554
+ if (ws_start_button.textContent === 'Connect WebSocket') {
555
+ setupWebSocket();
556
+ ws_start_button.textContent = 'Stop';
557
+ } else {
558
+ ws_stop();
559
+ ws_start_button.textContent = 'Connect WebSocket';
560
+ }
561
+ });
562
+ document.addEventListener("beforeunload", () => {
563
+ ws_stop();
564
+ webrtc_stop();
565
+ });
566
+ </script>
567
+ </body>
568
+
569
+ </html>
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastrtc
2
+ elevenlabs
3
+ groq
4
+ anthropic
5
+ twilio
6
+ python-dotenv