Update app.py
Browse files
app.py
CHANGED
@@ -87,31 +87,141 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
87 |
@app.get("/", response_class=HTMLResponse)
|
88 |
async def get_home():
|
89 |
return """
|
|
|
90 |
<html>
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
<option value="tiny">Tiny Model</option>
|
95 |
<option value="base">Base Model</option>
|
96 |
</select>
|
97 |
-
<p id="status">Click start to begin transcription.</p>
|
98 |
-
<
|
|
|
|
|
|
|
|
|
|
|
99 |
<script>
|
100 |
let ws;
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
ws = new WebSocket("wss://" + location.host + "/ws/transcribe");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
ws.onmessage = function(event) {
|
104 |
const data = JSON.parse(event.data);
|
105 |
-
if (data.type === '
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
}
|
108 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
}
|
|
|
110 |
function switchModel() {
|
111 |
const model = document.getElementById("modelSelect").value;
|
112 |
if (ws && ws.readyState === WebSocket.OPEN) {
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
}
|
|
|
115 |
}
|
116 |
</script>
|
117 |
</body>
|
|
|
87 |
@app.get("/", response_class=HTMLResponse)
|
88 |
async def get_home():
|
89 |
return """
|
90 |
+
<!DOCTYPE html>
|
91 |
<html>
|
92 |
+
<head>
|
93 |
+
<meta charset="UTF-8">
|
94 |
+
<title>AssemblyAI Realtime Transcription</title>
|
95 |
+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
|
96 |
+
</head>
|
97 |
+
<body class="bg-gray-100 p-6">
|
98 |
+
<div class="max-w-3xl mx-auto bg-white p-6 rounded-lg shadow-md">
|
99 |
+
<h1 class="text-2xl font-bold mb-4">Realtime Transcription</h1>
|
100 |
+
<button onclick="startTranscription()" class="bg-blue-500 text-white px-4 py-2 rounded mb-4">Start Transcription</button>
|
101 |
+
<select id="modelSelect" onchange="switchModel()" class="bg-gray-200 px-4 py-2 rounded mb-4">
|
102 |
<option value="tiny">Tiny Model</option>
|
103 |
<option value="base">Base Model</option>
|
104 |
</select>
|
105 |
+
<p id="status" class="text-gray-600 mb-4">Click start to begin transcription.</p>
|
106 |
+
<p id="speakingStatus" class="text-gray-600 mb-4"></p>
|
107 |
+
<div id="transcription" class="border p-4 rounded mb-4 h-64 overflow-auto"></div>
|
108 |
+
<div id="visualizer" class="border p-4 rounded h-64">
|
109 |
+
<canvas id="audioCanvas" class="w-full h-full"></canvas>
|
110 |
+
</div>
|
111 |
+
</div>
|
112 |
<script>
|
113 |
let ws;
|
114 |
+
let audioContext;
|
115 |
+
let scriptProcessor;
|
116 |
+
let mediaStream;
|
117 |
+
let currentLine = document.createElement('span');
|
118 |
+
let analyser;
|
119 |
+
let canvas, canvasContext;
|
120 |
+
|
121 |
+
document.getElementById('transcription').appendChild(currentLine);
|
122 |
+
canvas = document.getElementById('audioCanvas');
|
123 |
+
canvasContext = canvas.getContext('2d');
|
124 |
+
|
125 |
+
async function startTranscription() {
|
126 |
+
document.getElementById("status").innerText = "Connecting...";
|
127 |
ws = new WebSocket("wss://" + location.host + "/ws/transcribe");
|
128 |
+
ws.binaryType = 'arraybuffer';
|
129 |
+
|
130 |
+
ws.onopen = async function() {
|
131 |
+
document.getElementById("status").innerText = "Connected";
|
132 |
+
try {
|
133 |
+
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
134 |
+
audioContext = new AudioContext({ sampleRate: 16000 });
|
135 |
+
const source = audioContext.createMediaStreamSource(mediaStream);
|
136 |
+
analyser = audioContext.createAnalyser();
|
137 |
+
analyser.fftSize = 2048;
|
138 |
+
const bufferLength = analyser.frequencyBinCount;
|
139 |
+
const dataArray = new Uint8Array(bufferLength);
|
140 |
+
source.connect(analyser);
|
141 |
+
scriptProcessor = audioContext.createScriptProcessor(512, 1, 1);
|
142 |
+
scriptProcessor.onaudioprocess = function(event) {
|
143 |
+
const inputData = event.inputBuffer.getChannelData(0);
|
144 |
+
const pcm16 = floatTo16BitPCM(inputData);
|
145 |
+
if (ws.readyState === WebSocket.OPEN) {
|
146 |
+
ws.send(pcm16);
|
147 |
+
}
|
148 |
+
analyser.getByteTimeDomainData(dataArray);
|
149 |
+
canvasContext.fillStyle = 'rgb(200, 200, 200)';
|
150 |
+
canvasContext.fillRect(0, 0, canvas.width, canvas.height);
|
151 |
+
canvasContext.lineWidth = 2;
|
152 |
+
canvasContext.strokeStyle = 'rgb(0, 0, 0)';
|
153 |
+
canvasContext.beginPath();
|
154 |
+
let sliceWidth = canvas.width * 1.0 / bufferLength;
|
155 |
+
let x = 0;
|
156 |
+
for (let i = 0; i < bufferLength; i++) {
|
157 |
+
let v = dataArray[i] / 128.0;
|
158 |
+
let y = v * canvas.height / 2;
|
159 |
+
if (i === 0) {
|
160 |
+
canvasContext.moveTo(x, y);
|
161 |
+
} else {
|
162 |
+
canvasContext.lineTo(x, y);
|
163 |
+
}
|
164 |
+
x += sliceWidth;
|
165 |
+
}
|
166 |
+
canvasContext.lineTo(canvas.width, canvas.height / 2);
|
167 |
+
canvasContext.stroke();
|
168 |
+
};
|
169 |
+
source.connect(scriptProcessor);
|
170 |
+
scriptProcessor.connect(audioContext.destination);
|
171 |
+
} catch (err) {
|
172 |
+
document.getElementById("status").innerText = "Error: " + err;
|
173 |
+
}
|
174 |
+
};
|
175 |
+
|
176 |
ws.onmessage = function(event) {
|
177 |
const data = JSON.parse(event.data);
|
178 |
+
if (data.type === 'partial') {
|
179 |
+
currentLine.style.color = 'gray';
|
180 |
+
currentLine.textContent = data.transcript + ' ';
|
181 |
+
} else if (data.type === 'final') {
|
182 |
+
currentLine.style.color = 'black';
|
183 |
+
currentLine.textContent = data.transcript;
|
184 |
+
currentLine = document.createElement('span');
|
185 |
+
document.getElementById('transcription').appendChild(document.createElement('br'));
|
186 |
+
document.getElementById('transcription').appendChild(currentLine);
|
187 |
+
} else if (data.type === 'status') {
|
188 |
+
if (data.message === 'speaking_started') {
|
189 |
+
document.getElementById("speakingStatus").innerText = "Speaking Started";
|
190 |
+
document.getElementById("speakingStatus").style.color = "green";
|
191 |
+
} else if (data.message === 'speaking_stopped') {
|
192 |
+
document.getElementById("speakingStatus").innerText = "Speaking Stopped";
|
193 |
+
document.getElementById("speakingStatus").style.color = "red";
|
194 |
+
}
|
195 |
}
|
196 |
};
|
197 |
+
|
198 |
+
ws.onclose = function() {
|
199 |
+
if (audioContext && audioContext.state !== 'closed') {
|
200 |
+
audioContext.close();
|
201 |
+
}
|
202 |
+
document.getElementById("status").innerText = "Closed";
|
203 |
+
};
|
204 |
}
|
205 |
+
|
206 |
function switchModel() {
|
207 |
const model = document.getElementById("modelSelect").value;
|
208 |
if (ws && ws.readyState === WebSocket.OPEN) {
|
209 |
+
if (model === "tiny") {
|
210 |
+
ws.send("switch_to_tiny");
|
211 |
+
} else if (model === "base") {
|
212 |
+
ws.send("switch_to_base");
|
213 |
+
}
|
214 |
+
}
|
215 |
+
}
|
216 |
+
|
217 |
+
function floatTo16BitPCM(input) {
|
218 |
+
const buffer = new ArrayBuffer(input.length * 2);
|
219 |
+
const output = new DataView(buffer);
|
220 |
+
for (let i = 0; i < input.length; i++) {
|
221 |
+
let s = Math.max(-1, Math.min(1, input[i]));
|
222 |
+
output.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
|
223 |
}
|
224 |
+
return buffer;
|
225 |
}
|
226 |
</script>
|
227 |
</body>
|