bcci commited on
Commit
589cb0a
·
verified ·
1 Parent(s): de2f549

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -9
app.py CHANGED
@@ -87,31 +87,141 @@ async def websocket_endpoint(websocket: WebSocket):
87
  @app.get("/", response_class=HTMLResponse)
88
  async def get_home():
89
  return """
 
90
  <html>
91
- <body>
92
- <button onclick="startTranscription()">Start Transcription</button>
93
- <select id="modelSelect" onchange="switchModel()">
 
 
 
 
 
 
 
94
  <option value="tiny">Tiny Model</option>
95
  <option value="base">Base Model</option>
96
  </select>
97
- <p id="status">Click start to begin transcription.</p>
98
- <div id="transcription"></div>
 
 
 
 
 
99
  <script>
100
  let ws;
101
- function startTranscription() {
 
 
 
 
 
 
 
 
 
 
 
 
102
  ws = new WebSocket("wss://" + location.host + "/ws/transcribe");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  ws.onmessage = function(event) {
104
  const data = JSON.parse(event.data);
105
- if (data.type === 'final') {
106
- document.getElementById("transcription").innerHTML += `<p>${data.transcript}</p>`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
108
  };
 
 
 
 
 
 
 
109
  }
 
110
  function switchModel() {
111
  const model = document.getElementById("modelSelect").value;
112
  if (ws && ws.readyState === WebSocket.OPEN) {
113
- ws.send(model === "tiny" ? "switch_to_tiny" : "switch_to_base");
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
 
115
  }
116
  </script>
117
  </body>
 
87
  @app.get("/", response_class=HTMLResponse)
88
  async def get_home():
89
  return """
90
+ <!DOCTYPE html>
91
  <html>
92
+ <head>
93
+ <meta charset="UTF-8">
94
+ <title>AssemblyAI Realtime Transcription</title>
95
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
96
+ </head>
97
+ <body class="bg-gray-100 p-6">
98
+ <div class="max-w-3xl mx-auto bg-white p-6 rounded-lg shadow-md">
99
+ <h1 class="text-2xl font-bold mb-4">Realtime Transcription</h1>
100
+ <button onclick="startTranscription()" class="bg-blue-500 text-white px-4 py-2 rounded mb-4">Start Transcription</button>
101
+ <select id="modelSelect" onchange="switchModel()" class="bg-gray-200 px-4 py-2 rounded mb-4">
102
  <option value="tiny">Tiny Model</option>
103
  <option value="base">Base Model</option>
104
  </select>
105
+ <p id="status" class="text-gray-600 mb-4">Click start to begin transcription.</p>
106
+ <p id="speakingStatus" class="text-gray-600 mb-4"></p>
107
+ <div id="transcription" class="border p-4 rounded mb-4 h-64 overflow-auto"></div>
108
+ <div id="visualizer" class="border p-4 rounded h-64">
109
+ <canvas id="audioCanvas" class="w-full h-full"></canvas>
110
+ </div>
111
+ </div>
112
  <script>
113
  let ws;
114
+ let audioContext;
115
+ let scriptProcessor;
116
+ let mediaStream;
117
+ let currentLine = document.createElement('span');
118
+ let analyser;
119
+ let canvas, canvasContext;
120
+
121
+ document.getElementById('transcription').appendChild(currentLine);
122
+ canvas = document.getElementById('audioCanvas');
123
+ canvasContext = canvas.getContext('2d');
124
+
125
+ async function startTranscription() {
126
+ document.getElementById("status").innerText = "Connecting...";
127
  ws = new WebSocket("wss://" + location.host + "/ws/transcribe");
128
+ ws.binaryType = 'arraybuffer';
129
+
130
+ ws.onopen = async function() {
131
+ document.getElementById("status").innerText = "Connected";
132
+ try {
133
+ mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
134
+ audioContext = new AudioContext({ sampleRate: 16000 });
135
+ const source = audioContext.createMediaStreamSource(mediaStream);
136
+ analyser = audioContext.createAnalyser();
137
+ analyser.fftSize = 2048;
138
+ const bufferLength = analyser.frequencyBinCount;
139
+ const dataArray = new Uint8Array(bufferLength);
140
+ source.connect(analyser);
141
+ scriptProcessor = audioContext.createScriptProcessor(512, 1, 1);
142
+ scriptProcessor.onaudioprocess = function(event) {
143
+ const inputData = event.inputBuffer.getChannelData(0);
144
+ const pcm16 = floatTo16BitPCM(inputData);
145
+ if (ws.readyState === WebSocket.OPEN) {
146
+ ws.send(pcm16);
147
+ }
148
+ analyser.getByteTimeDomainData(dataArray);
149
+ canvasContext.fillStyle = 'rgb(200, 200, 200)';
150
+ canvasContext.fillRect(0, 0, canvas.width, canvas.height);
151
+ canvasContext.lineWidth = 2;
152
+ canvasContext.strokeStyle = 'rgb(0, 0, 0)';
153
+ canvasContext.beginPath();
154
+ let sliceWidth = canvas.width * 1.0 / bufferLength;
155
+ let x = 0;
156
+ for (let i = 0; i < bufferLength; i++) {
157
+ let v = dataArray[i] / 128.0;
158
+ let y = v * canvas.height / 2;
159
+ if (i === 0) {
160
+ canvasContext.moveTo(x, y);
161
+ } else {
162
+ canvasContext.lineTo(x, y);
163
+ }
164
+ x += sliceWidth;
165
+ }
166
+ canvasContext.lineTo(canvas.width, canvas.height / 2);
167
+ canvasContext.stroke();
168
+ };
169
+ source.connect(scriptProcessor);
170
+ scriptProcessor.connect(audioContext.destination);
171
+ } catch (err) {
172
+ document.getElementById("status").innerText = "Error: " + err;
173
+ }
174
+ };
175
+
176
  ws.onmessage = function(event) {
177
  const data = JSON.parse(event.data);
178
+ if (data.type === 'partial') {
179
+ currentLine.style.color = 'gray';
180
+ currentLine.textContent = data.transcript + ' ';
181
+ } else if (data.type === 'final') {
182
+ currentLine.style.color = 'black';
183
+ currentLine.textContent = data.transcript;
184
+ currentLine = document.createElement('span');
185
+ document.getElementById('transcription').appendChild(document.createElement('br'));
186
+ document.getElementById('transcription').appendChild(currentLine);
187
+ } else if (data.type === 'status') {
188
+ if (data.message === 'speaking_started') {
189
+ document.getElementById("speakingStatus").innerText = "Speaking Started";
190
+ document.getElementById("speakingStatus").style.color = "green";
191
+ } else if (data.message === 'speaking_stopped') {
192
+ document.getElementById("speakingStatus").innerText = "Speaking Stopped";
193
+ document.getElementById("speakingStatus").style.color = "red";
194
+ }
195
  }
196
  };
197
+
198
+ ws.onclose = function() {
199
+ if (audioContext && audioContext.state !== 'closed') {
200
+ audioContext.close();
201
+ }
202
+ document.getElementById("status").innerText = "Closed";
203
+ };
204
  }
205
+
206
  function switchModel() {
207
  const model = document.getElementById("modelSelect").value;
208
  if (ws && ws.readyState === WebSocket.OPEN) {
209
+ if (model === "tiny") {
210
+ ws.send("switch_to_tiny");
211
+ } else if (model === "base") {
212
+ ws.send("switch_to_base");
213
+ }
214
+ }
215
+ }
216
+
217
+ function floatTo16BitPCM(input) {
218
+ const buffer = new ArrayBuffer(input.length * 2);
219
+ const output = new DataView(buffer);
220
+ for (let i = 0; i < input.length; i++) {
221
+ let s = Math.max(-1, Math.min(1, input[i]));
222
+ output.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
223
  }
224
+ return buffer;
225
  }
226
  </script>
227
  </body>