realtime-transcription / index-screen.html
Sofia Casadei
add: big screen ui
489ba9a
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Real-time Whisper Transcription</title>
<style>
:root {
--background-dark: #000000;
--text-light: #ffffff;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
margin: 0; /* Removes default margin */
padding: 0; /* Removes default padding */
background-color: var(--background-dark); /* Sets background to black */
color: var(--text-light); /* Sets text to white */
min-height: 100vh; /* Ensures page fills entire viewport height */
}
/* Hide the header in presentation mode */
.hero {
display: none; /* Hides the hero section completely */
}
.container {
max-width: 100%; /* Makes container full width */
margin: 0; /* Removes margin */
padding: 1rem; /* Adds small padding all around */
}
/* Base styling for transcript container */
.transcript-container {
height: 90vh; /* Sets height to 90% of viewport height */
border: none; /* Removes border */
padding: 2rem; /* Adds generous padding inside */
background: var(--background-dark); /* Ensures background is black */
color: var(--text-light); /* Ensures text is white */
overflow-y: auto; /* Enables vertical scrolling when content overflows */
margin-bottom: 0; /* Removes bottom margin */
display: block; /* Makes element a block to take full width */
width: 100%; /* Sets width to 100% */
}
/* Styling for transcript paragraphs */
.transcript-container p {
margin: 0.5rem 0; /* Small vertical margin between paragraphs */
padding: 0.5rem 0; /* Small vertical padding within paragraphs */
background: transparent; /* Transparent background (no highlighting) */
border-radius: 0; /* No rounded corners */
line-height: 1.6; /* Increases line spacing for readability */
font-size: 3.5rem; /* rem means relative to the root font size */
font-weight: 500; /* 500 = medium weight, 700 = bold */
max-width: 98%; /* Full width within container */
white-space: normal; /* Allows text to wrap normally */
word-wrap: break-word; /* Prevents overflow of long words */
color: white; /* Explicitly sets text color to white */
display: block; /* Each paragraph takes full width */
}
/* Current paragraph styling - slightly brighter for emphasis */
.transcript-container p.current {
background: transparent; /* No background color */
color: rgba(255, 255, 255, 1.0); /* Full brightness white for current text */
}
/* Ensure all paragraphs have full opacity (keeps history visible) */
.transcript-container p:nth-last-child(n+4) {
opacity: 1.0; /* Shows all paragraphs at full opacity */
}
/* Controls for starting/stopping transcription */
.controls {
position: fixed; /* Fixes controls to viewport */
bottom: 2rem; /* Positions 2rem from bottom */
right: 2rem; /* Positions 2rem from right */
margin: 0; /* No margin */
opacity: 0.8; /* Slightly transparent when not hovered */
transition: opacity 0.3s ease; /* Smooth transition for opacity changes */
z-index: 1000; /* Ensures controls appear above other elements */
}
.controls:hover {
opacity: 1; /* Full opacity on hover */
}
/* Button styling - orange with black text for good contrast */
button {
background: rgba(249, 164, 92, 1.0); /* Solid orange background */
backdrop-filter: blur(5px); /* Blur effect for elements behind */
font-size: 1.2rem; /* Large text */
min-width: 160px; /* Minimum width for button */
padding: 15px 30px; /* Generous padding inside button */
color: black !important; /* Forces black text color */
font-weight: bold; /* Bold text for better visibility */
border: 2px solid rgba(255, 255, 255, 0.2); /* Subtle border */
border-radius: 8px; /* Rounded corners */
cursor: pointer; /* Shows pointer cursor on hover */
transition: all 0.2s ease; /* Smooth transition for hover effects */
display: block; /* Makes button take up full width */
}
button:hover {
background: rgba(249, 164, 92, 0.9); /* Slightly more transparent on hover */
transform: translateY(-2px); /* Slight upward movement on hover */
}
/* Spinner animation for loading state */
.icon-with-spinner .spinner {
border: 3px solid black; /* Spinner border */
border-top: 3px solid transparent; /* Transparent top creates spinning effect */
border-radius: 50%; /* Makes it circular */
width: 24px; /* Width of spinner */
height: 24px; /* Height of spinner */
animation: spin 1s linear infinite; /* Animation for spinning effect */
}
@keyframes spin {
0% { transform: rotate(0deg); } /* Starting rotation */
100% { transform: rotate(360deg); } /* Full 360° rotation */
}
/* Recording indicator pulse animation */
.pulse-circle {
display: inline-block; /* Allows other elements inline */
width: 12px; /* Width of pulse circle */
height: 12px; /* Height of pulse circle */
border-radius: 50%; /* Makes it circular */
background-color: red; /* Red color for recording indicator */
margin-right: 8px; /* Space to right of circle */
animation: pulse 1.5s ease infinite; /* Continuous pulsing animation */
}
@keyframes pulse {
0% { transform: scale(0.95); opacity: 0.7; } /* Slightly smaller and transparent */
50% { transform: scale(1.1); opacity: 1; } /* Larger and fully opaque */
100% { transform: scale(0.95); opacity: 0.7; } /* Back to starting state */
}
/* Custom scrollbar styling */
.transcript-container::-webkit-scrollbar {
width: 8px; /* Width of scrollbar */
}
.transcript-container::-webkit-scrollbar-track {
background: var(--background-dark); /* Black scrollbar track */
}
.transcript-container::-webkit-scrollbar-thumb {
background: rgba(249, 164, 92, 0.3); /* Semi-transparent orange scrollbar thumb */
border-radius: 4px; /* Rounded corners on scrollbar thumb */
}
/* Error toast styling */
.toast {
background: rgba(0, 0, 0, 0.8); /* Semi-transparent black background */
backdrop-filter: blur(5px); /* Blur effect behind toast */
color: var(--text-light); /* White text */
font-size: 1.2rem; /* Large text size */
}
</style>
</head>
<body>
<!-- Error message container that slides in when needed -->
<div id="error-toast" class="toast"></div>
<!-- Header section (hidden in presentation mode) -->
<div class="hero">
<h1>Real-time Transcription</h1>
<p>Powered by FastRTC and Local Whisper 🤗</p>
</div>
<!-- Main content container -->
<div class="container">
<!-- Container for transcript text -->
<div class="transcript-container" id="transcript"></div>
<!-- Controls for starting/stopping recording -->
<div class="controls">
<button id="start-button">Start Recording</button>
</div>
</div>
<script>
// Global variables for WebRTC connection
let peerConnection; // Stores the WebRTC connection object for audio streaming
let webrtc_id; // A unique ID to identify this connection on the server
let audioContext, analyser, audioSource; // Audio processing objects for visualization
let audioLevel = 0; // Stores the current audio level (volume) from 0-1
let animationFrame; // Reference to the animation frame for audio visualization
let isRecording = false; // Tracks whether we're currently recording or not
let eventSource; // Object that receives transcription results from the server
// DOM element references
const startButton = document.getElementById('start-button'); // The button to start/stop recording
const transcriptDiv = document.getElementById('transcript'); // The container for transcription text
// Variables for managing the transcript display
let currentParagraph = null; // Reference to the current paragraph being updated
let lastUpdateTime = Date.now(); // Timestamp of when we last updated the transcript
// Show error messages to the user in a toast notification
function showError(message) {
const toast = document.getElementById('error-toast'); // Get the toast element
toast.textContent = message; // Set the error message
toast.style.display = 'block'; // Make the toast visible
// Hide toast after 5 seconds
setTimeout(() => {
toast.style.display = 'none'; // Hide the toast
}, 5000);
}
// Handle messages received from the server through WebRTC data channel
function handleMessage(event) {
// Parse JSON message
const eventJson = JSON.parse(event.data);
// Display errors to the user
if (eventJson.type === "error") {
showError(eventJson.message);
}
// Log all messages to console for debugging
console.log('Received message:', event.data);
}
// Update button appearance based on connection state
function updateButtonState() {
// If connecting, show spinner
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
startButton.innerHTML = `
<div class="icon-with-spinner">
<div class="spinner"></div>
<span>Connecting...</span>
</div>
`;
isRecording = false; // Not recording while connecting
// If connected, show pulsing recording indicator
} else if (peerConnection && peerConnection.connectionState === 'connected') {
startButton.innerHTML = `
<div class="pulse-container">
<div class="pulse-circle"></div>
<span>Stop Recording</span>
</div>
`;
isRecording = true; // Set recording state to true
// Default state - ready to start
} else {
startButton.innerHTML = 'Start Recording';
isRecording = false; // Not recording when not connected
}
}
// Set up audio visualization to show when the user is speaking
function setupAudioVisualization(stream) {
// Create or resume the audio context
if (!audioContext) {
// Create new audio context with browser compatibility handling
audioContext = new (window.AudioContext || window.webkitAudioContext)();
} else {
// Resume context if it was suspended
if (audioContext.state === 'suspended') {
audioContext.resume();
}
}
// Create audio analyzer for processing audio data
analyser = audioContext.createAnalyser();
// Create media source from microphone stream
audioSource = audioContext.createMediaStreamSource(stream);
// Connect source to analyzer
audioSource.connect(analyser);
// Set FFT size (controls frequency data resolution)
analyser.fftSize = 64;
// Create array to store frequency data
const dataArray = new Uint8Array(analyser.frequencyBinCount);
// Function to continuously update audio level visualization
function updateAudioLevel() {
// Get audio frequency data
analyser.getByteFrequencyData(dataArray);
// Calculate average volume across all frequencies
const average = Array.from(dataArray).reduce((a, b) => a + b, 0) / dataArray.length;
// Convert to 0-1 scale
audioLevel = average / 255;
// Update pulse circle size based on audio level
const pulseCircle = document.querySelector('.pulse-circle');
if (pulseCircle) {
pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
}
// Continue animation loop
animationFrame = requestAnimationFrame(updateAudioLevel);
}
// Start audio visualization loop
updateAudioLevel();
}
// Set up WebRTC connection for streaming audio to server
async function setupWebRTC() {
// Get WebRTC configuration from global variable
const config = __RTC_CONFIGURATION__;
// Create new peer connection
peerConnection = new RTCPeerConnection(config);
// Set connection timeout (15 seconds)
const connectionTimeout = setTimeout(() => {
if (peerConnection && peerConnection.connectionState !== 'connected') {
showError('Connection timeout. Please check your network and try again.');
stop(); // Stop connection attempt
}
}, 15000);
// Set warning for slow connection (5 seconds)
const timeoutId = setTimeout(() => {
const toast = document.getElementById('error-toast');
toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
toast.className = 'toast warning';
toast.style.display = 'block';
// Hide warning after 5 seconds
setTimeout(() => {
toast.style.display = 'none';
}, 5000);
}, 5000);
try {
// Request access to user's microphone
const stream = await navigator.mediaDevices.getUserMedia({
audio: true // Only request audio access
});
// Set up audio visualization
setupAudioVisualization(stream);
// Add audio tracks to WebRTC connection
stream.getTracks().forEach(track => {
peerConnection.addTrack(track, stream);
});
// Monitor connection state changes
peerConnection.addEventListener('connectionstatechange', () => {
// Log state changes
console.log('connectionstatechange', peerConnection.connectionState);
// Handle successful connection
if (peerConnection.connectionState === 'connected') {
clearTimeout(timeoutId);
clearTimeout(connectionTimeout);
const toast = document.getElementById('error-toast');
toast.style.display = 'none';
// Handle connection failures
} else if (peerConnection.connectionState === 'failed' ||
peerConnection.connectionState === 'disconnected' ||
peerConnection.connectionState === 'closed') {
showError('Connection lost. Please try again.');
stop();
}
// Update button appearance
updateButtonState();
});
// Create data channel for server messages
const dataChannel = peerConnection.createDataChannel('text');
dataChannel.onmessage = handleMessage; // Set message handler
// Create connection offer
const offer = await peerConnection.createOffer();
// Set local description (our end of connection)
await peerConnection.setLocalDescription(offer);
// Wait for ICE gathering to complete (finding connection methods)
await new Promise((resolve) => {
if (peerConnection.iceGatheringState === "complete") {
resolve(); // Already complete
} else {
// Function to check ICE gathering state
const checkState = () => {
if (peerConnection.iceGatheringState === "complete") {
peerConnection.removeEventListener("icegatheringstatechange", checkState);
resolve(); // Complete gathering
}
};
// Listen for ICE gathering state changes
peerConnection.addEventListener("icegatheringstatechange", checkState);
}
});
// Generate random ID for this connection
webrtc_id = Math.random().toString(36).substring(7);
// Send connection offer to server
const response = await fetch('/webrtc/offer', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
sdp: peerConnection.localDescription.sdp, // Session description
type: peerConnection.localDescription.type, // Offer type
webrtc_id: webrtc_id // Unique connection ID
})
});
// Parse server response
const serverResponse = await response.json();
// Handle server errors
if (serverResponse.status === 'failed') {
showError(serverResponse.meta.error === 'concurrency_limit_reached'
? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
: serverResponse.meta.error);
stop();
startButton.textContent = 'Start Recording';
return;
}
// Complete connection with server's description
await peerConnection.setRemoteDescription(serverResponse);
// Create event source for receiving transcription results
eventSource = new EventSource('/transcript?webrtc_id=' + webrtc_id);
// Handle event source errors
eventSource.onerror = (event) => {
console.error("EventSource error:", event);
showError("Transcription connection lost. Please try again.");
};
// Process transcription results as they arrive
eventSource.addEventListener("output", (event) => {
console.log("Received transcript chunk:", event.data);
// Add text to display
appendTranscript(event.data);
//appendTranscriptSimple(event.data);
});
} catch (err) {
// Handle any setup errors
clearTimeout(timeoutId);
console.error('Error setting up WebRTC:', err);
showError('Failed to establish connection. Please try again.');
stop();
startButton.textContent = 'Start Recording';
}
}
function appendTranscriptSimple(text) {
const p = document.createElement('p');
p.textContent = text;
transcriptDiv.appendChild(p);
transcriptDiv.scrollTop = transcriptDiv.scrollHeight;
}
// Add transcription text to display
function appendTranscript(text) {
// Clean up text
const formattedText = text.trim();
if (!formattedText) return;
const now = Date.now();
const timeSinceLastUpdate = now - lastUpdateTime;
lastUpdateTime = now;
// Handle transcript display
if (!currentParagraph) {
// Create new paragraph
currentParagraph = document.createElement('p');
currentParagraph.classList.add('current');
transcriptDiv.appendChild(currentParagraph);
currentParagraph.textContent = formattedText;
} else {
// Get current text
const currentText = currentParagraph.textContent;
// Fix spacing issues by normalizing
let cleanedText = formattedText;
// 1. Check for simple word repetition - last word repeated
const words = currentText.split(/\s+/);
const lastWord = words[words.length - 1].replace(/[^\w]/g, '').toLowerCase();
if (lastWord && lastWord.length > 2) {
// Check if new text starts with the same word
const regex = new RegExp(`^${lastWord}`, 'i');
if (regex.test(cleanedText.replace(/[^\w]/g, ''))) {
// Remove the first word if it's a duplicate
cleanedText = cleanedText.replace(regex, '').trim();
}
}
// 2. Add proper spacing
let finalText = currentText;
// Only add space if current text doesn't end with space or punctuation
// and new text doesn't start with punctuation
if (!/[\s.,!?]$/.test(finalText) && !/^[.,!?]/.test(cleanedText) && cleanedText) {
finalText += ' ';
}
// 3. Add the cleaned text
finalText += cleanedText;
// 4. Fix any run-together words by adding spaces after punctuation
finalText = finalText.replace(/([.,!?])([a-zA-Z])/g, '$1 $2');
// Update the paragraph text
currentParagraph.textContent = finalText;
}
// Create new paragraph on sentence end or pause
if (/[.!?]$/.test(formattedText) || timeSinceLastUpdate > 5000) {
// End current paragraph
if (currentParagraph) {
currentParagraph.classList.remove('current');
}
// Prepare for next paragraph
currentParagraph = null;
}
// Limit number of displayed paragraphs
const paragraphs = transcriptDiv.getElementsByTagName('p');
while (paragraphs.length > 10) { // Keep last 10 paragraphs
transcriptDiv.removeChild(paragraphs[0]);
}
// Scroll to show newest text
requestAnimationFrame(() => {
transcriptDiv.scrollTop = transcriptDiv.scrollHeight;
});
}
// Stop recording and clean up resources
function stop() {
// Stop audio visualization
if (animationFrame) {
cancelAnimationFrame(animationFrame);
animationFrame = null;
}
// Pause audio processing
if (audioContext) {
audioContext.suspend();
}
// Stop all media tracks
if (peerConnection) {
const senders = peerConnection.getSenders();
if (senders) {
senders.forEach(sender => {
if (sender.track) {
sender.track.stop(); // Release microphone
}
});
}
// Close WebRTC connection
peerConnection.close();
peerConnection = null;
}
// Close transcription connection
if (eventSource) {
eventSource.close();
eventSource = null;
}
// Reset audio level
audioLevel = 0;
// Update button display
updateButtonState();
// Ask about clearing transcript
if (window.confirm('Clear transcript?')) {
// Clear all transcript text
transcriptDiv.innerHTML = '';
currentParagraph = null;
} else {
// Just end current paragraph
if (currentParagraph) {
currentParagraph.classList.remove('current');
currentParagraph = null;
}
}
// Reset timestamp
lastUpdateTime = Date.now();
}
// Clean up resources when page is closed
window.addEventListener('beforeunload', () => {
stop(); // Stop recording and release resources
});
// Handle start/stop button clicks
startButton.addEventListener('click', () => {
if (!isRecording) {
// Start recording if not already recording
setupWebRTC();
} else {
// Stop recording if currently recording
stop();
}
});
// Initialize UI when page loads
document.addEventListener('DOMContentLoaded', () => {
// Ensure all UI elements are visible
const elementsToCheck = [
transcriptDiv,
startButton,
document.getElementById('error-toast')
];
// Set appropriate display for each element
elementsToCheck.forEach(el => {
if (el) {
// Set appropriate display style based on element type
el.style.display = el.tagName.toLowerCase() === 'button' ? 'block' :
(el.id === 'transcript' ? 'block' : 'none');
}
});
// Apply CSS variables to ensure theme is working
document.body.style.backgroundColor = 'var(--background-dark)';
document.body.style.color = 'var(--text-light)';
// Force button colors for consistency
startButton.style.backgroundColor = 'rgba(249, 164, 92, 1.0)';
startButton.style.color = 'black';
});
</script>
</body>
</html>