Sofia Casadei commited on
Commit
d4e500e
Β·
1 Parent(s): 41f3522
Files changed (3) hide show
  1. requirements.txt +7 -7
  2. static/client.js +0 -127
  3. static/index.html +399 -28
requirements.txt CHANGED
@@ -1,9 +1,9 @@
1
- accelerate
2
- fastrtc
3
- fastrtc[vad]
4
- python-dotenv
5
- transformers
6
- torch
7
- torchaudio
8
  fastapi
9
  uvicorn[standard]
 
1
+ accelerate==1.4.0
2
+ fastrtc==0.0.15
3
+ fastrtc[vad]==0.0.15
4
+ python-dotenv==1.0.1
5
+ transformers==4.49.0
6
+ torch==2.6.0
7
+ torchaudio==2.6.0
8
  fastapi
9
  uvicorn[standard]
static/client.js DELETED
@@ -1,127 +0,0 @@
1
- // Global variables
2
- let peerConnection = null;
3
- let dataChannel = null;
4
- let webrtcId = null;
5
-
6
- // Helper function to generate unique ID
7
- function generateUniqueId() {
8
- return Math.random().toString(36).substring(7);
9
- }
10
-
11
- // Update UI status
12
- function updateStatus(connected) {
13
- const statusDiv = document.getElementById('status');
14
- const connectBtn = document.getElementById('connectBtn');
15
- const disconnectBtn = document.getElementById('disconnectBtn');
16
-
17
- statusDiv.textContent = connected ? 'Connected' : 'Disconnected';
18
- statusDiv.className = connected ? 'connected' : 'disconnected';
19
- connectBtn.disabled = connected;
20
- disconnectBtn.disabled = !connected;
21
- }
22
-
23
- // Setup WebRTC connection
24
- async function setupWebRTC() {
25
- try {
26
- // Create peer connection
27
- peerConnection = new RTCPeerConnection();
28
- webrtcId = generateUniqueId();
29
-
30
- // Get audio stream from microphone
31
- const stream = await navigator.mediaDevices.getUserMedia({
32
- audio: true
33
- });
34
-
35
- // Add audio stream to peer connection
36
- stream.getTracks().forEach(track => {
37
- peerConnection.addTrack(track, stream);
38
- });
39
-
40
- // Create data channel
41
- dataChannel = peerConnection.createDataChannel("text");
42
-
43
- // Handle data channel messages
44
- dataChannel.onmessage = (event) => {
45
- const message = JSON.parse(event.data);
46
- console.log("Received message:", message);
47
-
48
- // Handle different message types
49
- switch(message.type) {
50
- case 'log':
51
- console.log("Server log:", message.data);
52
- break;
53
- case 'error':
54
- console.error("Server error:", message.data);
55
- break;
56
- case 'warning':
57
- console.warn("Server warning:", message.data);
58
- break;
59
- }
60
- };
61
-
62
- // Create and send offer
63
- const offer = await peerConnection.createOffer();
64
- await peerConnection.setLocalDescription(offer);
65
-
66
- // Send offer to server
67
- const response = await fetch('/webrtc/offer', {
68
- method: 'POST',
69
- headers: { 'Content-Type': 'application/json' },
70
- body: JSON.stringify({
71
- sdp: offer.sdp,
72
- type: offer.type,
73
- webrtc_id: webrtcId
74
- })
75
- });
76
-
77
- if (!response.ok) {
78
- throw new Error(`HTTP error! status: ${response.status}`);
79
- }
80
-
81
- // Handle server response
82
- const serverResponse = await response.json();
83
-
84
- // Check for error response
85
- if (serverResponse.status === 'failed') {
86
- throw new Error(serverResponse.meta.error);
87
- }
88
-
89
- // Set remote description
90
- await peerConnection.setRemoteDescription(serverResponse);
91
-
92
- // Update UI
93
- updateStatus(true);
94
-
95
- // Add to setupWebRTC():
96
- const eventSource = new EventSource(`/transcript?webrtc_id=${webrtcId}`);
97
-
98
- eventSource.onmessage = (event) => {
99
- const transcriptDiv = document.getElementById('transcript');
100
- transcriptDiv.innerHTML += `<p>${event.data}</p>`;
101
- };
102
-
103
- } catch (error) {
104
- console.error("Error setting up WebRTC:", error);
105
- updateStatus(false);
106
- }
107
- }
108
-
109
- // Cleanup function
110
- function disconnect() {
111
- if (peerConnection) {
112
- peerConnection.close();
113
- peerConnection = null;
114
- }
115
- if (dataChannel) {
116
- dataChannel.close();
117
- dataChannel = null;
118
- }
119
- webrtcId = null;
120
- updateStatus(false);
121
- }
122
-
123
- // Add event listeners when page loads
124
- document.addEventListener('DOMContentLoaded', () => {
125
- document.getElementById('connectBtn').addEventListener('click', setupWebRTC);
126
- document.getElementById('disconnectBtn').addEventListener('click', disconnect);
127
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/index.html CHANGED
@@ -1,53 +1,424 @@
1
  <!DOCTYPE html>
2
  <html lang="en">
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>FastRTC Audio Client</title>
7
  <style>
 
 
 
 
 
 
8
  body {
9
- font-family: Arial, sans-serif;
10
- max-width: 800px;
11
- margin: 0 auto;
12
- padding: 20px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  }
 
 
 
 
 
 
 
 
 
 
 
14
  .controls {
15
- margin: 20px 0;
 
16
  }
 
17
  button {
 
 
 
18
  padding: 10px 20px;
19
- margin: 5px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  }
21
- #status {
22
- margin: 10px 0;
23
- padding: 10px;
 
 
 
24
  border-radius: 4px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
- .connected {
27
- background-color: #d4edda;
28
- color: #155724;
 
 
 
 
 
 
 
29
  }
30
- .disconnected {
31
- background-color: #f8d7da;
32
- color: #721c24;
 
 
33
  }
34
  </style>
35
  </head>
 
36
  <body>
37
- <h1>FastRTC Audio Client</h1>
38
- <div id="status" class="disconnected">Disconnected</div>
39
-
40
- <div class="controls">
41
- <button id="connectBtn">Connect</button>
42
- <button id="disconnectBtn" disabled>Disconnect</button>
43
  </div>
44
 
45
- <!-- Audio element for playback -->
46
- <audio id="audioOutput" autoplay></audio>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- <div id="transcript" style="margin-top: 20px; padding: 10px; border: 1px solid #ccc;"></div>
 
 
 
49
 
50
- <!-- Load our WebRTC client code -->
51
- <script src="/static/client.js"></script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  </body>
53
- </html>
 
 
1
  <!DOCTYPE html>
2
  <html lang="en">
3
+
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Real-time Whisper Transcription</title>
8
  <style>
9
+ :root {
10
+ --primary-gradient: linear-gradient(135deg, #f9a45c 0%, #e66465 100%);
11
+ --background-cream: #faf8f5;
12
+ --text-dark: #2d2d2d;
13
+ }
14
+
15
  body {
16
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
17
+ margin: 0;
18
+ padding: 0;
19
+ background-color: var(--background-cream);
20
+ color: var(--text-dark);
21
+ min-height: 100vh;
22
+ }
23
+
24
+ .hero {
25
+ background: var(--primary-gradient);
26
+ color: white;
27
+ padding: 2.5rem 2rem;
28
+ text-align: center;
29
+ }
30
+
31
+ .hero h1 {
32
+ font-size: 2.5rem;
33
+ margin: 0;
34
+ font-weight: 600;
35
+ letter-spacing: -0.5px;
36
+ }
37
+
38
+ .hero p {
39
+ font-size: 1rem;
40
+ margin-top: 0.5rem;
41
+ opacity: 0.9;
42
+ }
43
+
44
+ .container {
45
+ max-width: 1000px;
46
+ margin: 1.5rem auto;
47
+ padding: 0 2rem;
48
  }
49
+
50
+ .transcript-container {
51
+ border-radius: 8px;
52
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
53
+ padding: 1.5rem;
54
+ height: 300px;
55
+ overflow-y: auto;
56
+ margin-bottom: 1.5rem;
57
+ border: 1px solid rgba(0, 0, 0, 0.1);
58
+ }
59
+
60
  .controls {
61
+ text-align: center;
62
+ margin: 1.5rem 0;
63
  }
64
+
65
  button {
66
+ background: var(--primary-gradient);
67
+ color: white;
68
+ border: none;
69
  padding: 10px 20px;
70
+ font-size: 0.95rem;
71
+ border-radius: 6px;
72
+ cursor: pointer;
73
+ transition: all 0.2s ease;
74
+ font-weight: 500;
75
+ min-width: 180px;
76
+ }
77
+
78
+ button:hover {
79
+ transform: translateY(-1px);
80
+ box-shadow: 0 4px 12px rgba(230, 100, 101, 0.15);
81
+ }
82
+
83
+ button:active {
84
+ transform: translateY(0);
85
  }
86
+
87
+ /* Transcript text styling */
88
+ .transcript-container p {
89
+ margin: 0.4rem 0;
90
+ padding: 0.6rem;
91
+ background: var(--background-cream);
92
  border-radius: 4px;
93
+ line-height: 1.4;
94
+ font-size: 0.95rem;
95
+ }
96
+
97
+ /* Custom scrollbar - made thinner */
98
+ .transcript-container::-webkit-scrollbar {
99
+ width: 6px;
100
+ }
101
+
102
+ .transcript-container::-webkit-scrollbar-track {
103
+ background: var(--background-cream);
104
+ border-radius: 3px;
105
+ }
106
+
107
+ .transcript-container::-webkit-scrollbar-thumb {
108
+ background: #e66465;
109
+ border-radius: 3px;
110
+ opacity: 0.8;
111
+ }
112
+
113
+ .transcript-container::-webkit-scrollbar-thumb:hover {
114
+ background: #f9a45c;
115
+ }
116
+
117
+ /* Add styles for toast notifications */
118
+ .toast {
119
+ position: fixed;
120
+ top: 20px;
121
+ left: 50%;
122
+ transform: translateX(-50%);
123
+ padding: 16px 24px;
124
+ border-radius: 4px;
125
+ font-size: 14px;
126
+ z-index: 1000;
127
+ display: none;
128
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
129
+ }
130
+
131
+ .toast.error {
132
+ background-color: #f44336;
133
+ color: white;
134
+ }
135
+
136
+ .toast.warning {
137
+ background-color: #ffd700;
138
+ color: black;
139
+ }
140
+
141
+ /* Add styles for audio visualization */
142
+ .icon-with-spinner {
143
+ display: flex;
144
+ align-items: center;
145
+ justify-content: center;
146
+ gap: 12px;
147
+ min-width: 180px;
148
+ }
149
+
150
+ .spinner {
151
+ width: 20px;
152
+ height: 20px;
153
+ border: 2px solid white;
154
+ border-top-color: transparent;
155
+ border-radius: 50%;
156
+ animation: spin 1s linear infinite;
157
+ flex-shrink: 0;
158
+ }
159
+
160
+ .pulse-container {
161
+ display: flex;
162
+ align-items: center;
163
+ justify-content: center;
164
+ gap: 12px;
165
+ min-width: 180px;
166
  }
167
+
168
+ .pulse-circle {
169
+ width: 20px;
170
+ height: 20px;
171
+ border-radius: 50%;
172
+ background-color: white;
173
+ opacity: 0.2;
174
+ flex-shrink: 0;
175
+ transform: translateX(-0%) scale(var(--audio-level, 1));
176
+ transition: transform 0.1s ease;
177
  }
178
+
179
+ @keyframes spin {
180
+ to {
181
+ transform: rotate(360deg);
182
+ }
183
  }
184
  </style>
185
  </head>
186
+
187
  <body>
188
+ <!-- Add toast element after body opening tag -->
189
+ <div id="error-toast" class="toast"></div>
190
+ <div class="hero">
191
+ <h1>PyCon Italia 2025 Real-time Transcription</h1>
192
+ <p>Powered by FastRTC and Local Whisper</p>
 
193
  </div>
194
 
195
+ <div class="container">
196
+ <div class="transcript-container" id="transcript"></div>
197
+ <div class="controls">
198
+ <button id="start-button">Start Recording</button>
199
+ </div>
200
+ </div>
201
+
202
+ <script>
203
+ let peerConnection;
204
+ let webrtc_id;
205
+ let audioContext, analyser, audioSource;
206
+ let audioLevel = 0;
207
+ let animationFrame;
208
+
209
+ const startButton = document.getElementById('start-button');
210
+ const transcriptDiv = document.getElementById('transcript');
211
+
212
+ function showError(message) {
213
+ const toast = document.getElementById('error-toast');
214
+ toast.textContent = message;
215
+ toast.style.display = 'block';
216
+
217
+ // Hide toast after 5 seconds
218
+ setTimeout(() => {
219
+ toast.style.display = 'none';
220
+ }, 5000);
221
+ }
222
+
223
+ function handleMessage(event) {
224
+ // Handle any WebRTC data channel messages if needed
225
+ const eventJson = JSON.parse(event.data);
226
+ if (eventJson.type === "error") {
227
+ showError(eventJson.message);
228
+ }
229
+ console.log('Received message:', event.data);
230
+ }
231
+
232
+ function updateButtonState() {
233
+ if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
234
+ startButton.innerHTML = `
235
+ <div class="icon-with-spinner">
236
+ <div class="spinner"></div>
237
+ <span>Connecting...</span>
238
+ </div>
239
+ `;
240
+ } else if (peerConnection && peerConnection.connectionState === 'connected') {
241
+ startButton.innerHTML = `
242
+ <div class="pulse-container">
243
+ <div class="pulse-circle"></div>
244
+ <span>Stop Recording</span>
245
+ </div>
246
+ `;
247
+ } else {
248
+ startButton.innerHTML = 'Start Recording';
249
+ }
250
+ }
251
+
252
+ function setupAudioVisualization(stream) {
253
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
254
+ analyser = audioContext.createAnalyser();
255
+ audioSource = audioContext.createMediaStreamSource(stream);
256
+ audioSource.connect(analyser);
257
+ analyser.fftSize = 64;
258
+ const dataArray = new Uint8Array(analyser.frequencyBinCount);
259
+
260
+ function updateAudioLevel() {
261
+ analyser.getByteFrequencyData(dataArray);
262
+ const average = Array.from(dataArray).reduce((a, b) => a + b, 0) / dataArray.length;
263
+ audioLevel = average / 255;
264
+
265
+ const pulseCircle = document.querySelector('.pulse-circle');
266
+ if (pulseCircle) {
267
+ pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
268
+ }
269
+
270
+ animationFrame = requestAnimationFrame(updateAudioLevel);
271
+ }
272
+ updateAudioLevel();
273
+ }
274
+
275
+ async function setupWebRTC() {
276
+ const config = __RTC_CONFIGURATION__;
277
+ peerConnection = new RTCPeerConnection(config);
278
+
279
+ const timeoutId = setTimeout(() => {
280
+ const toast = document.getElementById('error-toast');
281
+ toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
282
+ toast.className = 'toast warning';
283
+ toast.style.display = 'block';
284
+
285
+ // Hide warning after 5 seconds
286
+ setTimeout(() => {
287
+ toast.style.display = 'none';
288
+ }, 5000);
289
+ }, 5000);
290
 
291
+ try {
292
+ const stream = await navigator.mediaDevices.getUserMedia({
293
+ audio: true
294
+ });
295
 
296
+ setupAudioVisualization(stream);
297
+
298
+ stream.getTracks().forEach(track => {
299
+ peerConnection.addTrack(track, stream);
300
+ });
301
+
302
+ // Add connection state change listener
303
+ peerConnection.addEventListener('connectionstatechange', () => {
304
+ console.log('connectionstatechange', peerConnection.connectionState);
305
+ if (peerConnection.connectionState === 'connected') {
306
+ clearTimeout(timeoutId);
307
+ const toast = document.getElementById('error-toast');
308
+ toast.style.display = 'none';
309
+ }
310
+ updateButtonState();
311
+ });
312
+
313
+ // Create data channel for messages
314
+ const dataChannel = peerConnection.createDataChannel('text');
315
+ dataChannel.onmessage = handleMessage;
316
+
317
+ // Create and send offer
318
+ const offer = await peerConnection.createOffer();
319
+ await peerConnection.setLocalDescription(offer);
320
+
321
+ await new Promise((resolve) => {
322
+ if (peerConnection.iceGatheringState === "complete") {
323
+ resolve();
324
+ } else {
325
+ const checkState = () => {
326
+ if (peerConnection.iceGatheringState === "complete") {
327
+ peerConnection.removeEventListener("icegatheringstatechange", checkState);
328
+ resolve();
329
+ }
330
+ };
331
+ peerConnection.addEventListener("icegatheringstatechange", checkState);
332
+ }
333
+ });
334
+
335
+ webrtc_id = Math.random().toString(36).substring(7);
336
+
337
+ const response = await fetch('/webrtc/offer', {
338
+ method: 'POST',
339
+ headers: { 'Content-Type': 'application/json' },
340
+ body: JSON.stringify({
341
+ sdp: peerConnection.localDescription.sdp,
342
+ type: peerConnection.localDescription.type,
343
+ webrtc_id: webrtc_id
344
+ })
345
+ });
346
+
347
+ const serverResponse = await response.json();
348
+
349
+ if (serverResponse.status === 'failed') {
350
+ showError(serverResponse.meta.error === 'concurrency_limit_reached'
351
+ ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
352
+ : serverResponse.meta.error);
353
+ stop();
354
+ startButton.textContent = 'Start Recording';
355
+ return;
356
+ }
357
+
358
+ await peerConnection.setRemoteDescription(serverResponse);
359
+
360
+ // Create event stream to receive transcripts
361
+ const eventSource = new EventSource('/transcript?webrtc_id=' + webrtc_id);
362
+ eventSource.addEventListener("output", (event) => {
363
+ appendTranscript(event.data);
364
+ });
365
+ } catch (err) {
366
+ clearTimeout(timeoutId);
367
+ console.error('Error setting up WebRTC:', err);
368
+ showError('Failed to establish connection. Please try again.');
369
+ stop();
370
+ startButton.textContent = 'Start Recording';
371
+ }
372
+ }
373
+
374
+ function appendTranscript(text) {
375
+ const p = document.createElement('p');
376
+ p.textContent = text;
377
+ transcriptDiv.appendChild(p);
378
+ transcriptDiv.scrollTop = transcriptDiv.scrollHeight;
379
+ }
380
+
381
+ function stop() {
382
+ if (animationFrame) {
383
+ cancelAnimationFrame(animationFrame);
384
+ }
385
+ if (audioContext) {
386
+ audioContext.close();
387
+ audioContext = null;
388
+ analyser = null;
389
+ audioSource = null;
390
+ }
391
+ if (peerConnection) {
392
+ if (peerConnection.getTransceivers) {
393
+ peerConnection.getTransceivers().forEach(transceiver => {
394
+ if (transceiver.stop) {
395
+ transceiver.stop();
396
+ }
397
+ });
398
+ }
399
+
400
+ if (peerConnection.getSenders) {
401
+ peerConnection.getSenders().forEach(sender => {
402
+ if (sender.track && sender.track.stop) sender.track.stop();
403
+ });
404
+ }
405
+
406
+ setTimeout(() => {
407
+ peerConnection.close();
408
+ }, 500);
409
+ }
410
+ audioLevel = 0;
411
+ updateButtonState();
412
+ }
413
+
414
+ startButton.addEventListener('click', () => {
415
+ if (startButton.textContent === 'Start Recording') {
416
+ setupWebRTC();
417
+ } else {
418
+ stop();
419
+ }
420
+ });
421
+ </script>
422
  </body>
423
+
424
+ </html>