Spaces:
Sleeping
Sleeping
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Real-time Whisper Transcription</title> | |
<style> | |
:root { | |
--background-dark: #000000; | |
--text-light: #ffffff; | |
} | |
body { | |
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; | |
margin: 0; /* Removes default margin */ | |
padding: 0; /* Removes default padding */ | |
background-color: var(--background-dark); /* Sets background to black */ | |
color: var(--text-light); /* Sets text to white */ | |
min-height: 100vh; /* Ensures page fills entire viewport height */ | |
} | |
/* Hide the header in presentation mode */ | |
.hero { | |
display: none; /* Hides the hero section completely */ | |
} | |
.container { | |
max-width: 100%; /* Makes container full width */ | |
margin: 0; /* Removes margin */ | |
padding: 1rem; /* Adds small padding all around */ | |
} | |
/* Base styling for transcript container */ | |
.transcript-container { | |
height: 90vh; /* Sets height to 90% of viewport height */ | |
border: none; /* Removes border */ | |
padding: 2rem; /* Adds generous padding inside */ | |
background: var(--background-dark); /* Ensures background is black */ | |
color: var(--text-light); /* Ensures text is white */ | |
overflow-y: auto; /* Enables vertical scrolling when content overflows */ | |
margin-bottom: 0; /* Removes bottom margin */ | |
display: block; /* Makes element a block to take full width */ | |
width: 100%; /* Sets width to 100% */ | |
} | |
/* Styling for transcript paragraphs */ | |
.transcript-container p { | |
margin: 0.5rem 0; /* Small vertical margin between paragraphs */ | |
padding: 0.5rem 0; /* Small vertical padding within paragraphs */ | |
background: transparent; /* Transparent background (no highlighting) */ | |
border-radius: 0; /* No rounded corners */ | |
line-height: 1.6; /* Increases line spacing for readability */ | |
font-size: 3.5rem; /* rem means relative to the root font size */ | |
font-weight: 500; /* 500 = medium weight, 700 = bold */ | |
max-width: 98%; /* Full width within container */ | |
white-space: normal; /* Allows text to wrap normally */ | |
word-wrap: break-word; /* Prevents overflow of long words */ | |
color: white; /* Explicitly sets text color to white */ | |
display: block; /* Each paragraph takes full width */ | |
} | |
/* Current paragraph styling - slightly brighter for emphasis */ | |
.transcript-container p.current { | |
background: transparent; /* No background color */ | |
color: rgba(255, 255, 255, 1.0); /* Full brightness white for current text */ | |
} | |
/* Ensure all paragraphs have full opacity (keeps history visible) */ | |
.transcript-container p:nth-last-child(n+4) { | |
opacity: 1.0; /* Shows all paragraphs at full opacity */ | |
} | |
/* Controls for starting/stopping transcription */ | |
.controls { | |
position: fixed; /* Fixes controls to viewport */ | |
bottom: 2rem; /* Positions 2rem from bottom */ | |
right: 2rem; /* Positions 2rem from right */ | |
margin: 0; /* No margin */ | |
opacity: 0.8; /* Slightly transparent when not hovered */ | |
transition: opacity 0.3s ease; /* Smooth transition for opacity changes */ | |
z-index: 1000; /* Ensures controls appear above other elements */ | |
} | |
.controls:hover { | |
opacity: 1; /* Full opacity on hover */ | |
} | |
/* Button styling - orange with black text for good contrast */ | |
button { | |
background: rgba(249, 164, 92, 1.0); /* Solid orange background */ | |
backdrop-filter: blur(5px); /* Blur effect for elements behind */ | |
font-size: 1.2rem; /* Large text */ | |
min-width: 160px; /* Minimum width for button */ | |
padding: 15px 30px; /* Generous padding inside button */ | |
color: black ; /* Forces black text color */ | |
font-weight: bold; /* Bold text for better visibility */ | |
border: 2px solid rgba(255, 255, 255, 0.2); /* Subtle border */ | |
border-radius: 8px; /* Rounded corners */ | |
cursor: pointer; /* Shows pointer cursor on hover */ | |
transition: all 0.2s ease; /* Smooth transition for hover effects */ | |
display: block; /* Makes button take up full width */ | |
} | |
button:hover { | |
background: rgba(249, 164, 92, 0.9); /* Slightly more transparent on hover */ | |
transform: translateY(-2px); /* Slight upward movement on hover */ | |
} | |
/* Spinner animation for loading state */ | |
.icon-with-spinner .spinner { | |
border: 3px solid black; /* Spinner border */ | |
border-top: 3px solid transparent; /* Transparent top creates spinning effect */ | |
border-radius: 50%; /* Makes it circular */ | |
width: 24px; /* Width of spinner */ | |
height: 24px; /* Height of spinner */ | |
animation: spin 1s linear infinite; /* Animation for spinning effect */ | |
} | |
@keyframes spin { | |
0% { transform: rotate(0deg); } /* Starting rotation */ | |
100% { transform: rotate(360deg); } /* Full 360° rotation */ | |
} | |
/* Recording indicator pulse animation */ | |
.pulse-circle { | |
display: inline-block; /* Allows other elements inline */ | |
width: 12px; /* Width of pulse circle */ | |
height: 12px; /* Height of pulse circle */ | |
border-radius: 50%; /* Makes it circular */ | |
background-color: red; /* Red color for recording indicator */ | |
margin-right: 8px; /* Space to right of circle */ | |
animation: pulse 1.5s ease infinite; /* Continuous pulsing animation */ | |
} | |
@keyframes pulse { | |
0% { transform: scale(0.95); opacity: 0.7; } /* Slightly smaller and transparent */ | |
50% { transform: scale(1.1); opacity: 1; } /* Larger and fully opaque */ | |
100% { transform: scale(0.95); opacity: 0.7; } /* Back to starting state */ | |
} | |
/* Custom scrollbar styling */ | |
.transcript-container::-webkit-scrollbar { | |
width: 8px; /* Width of scrollbar */ | |
} | |
.transcript-container::-webkit-scrollbar-track { | |
background: var(--background-dark); /* Black scrollbar track */ | |
} | |
.transcript-container::-webkit-scrollbar-thumb { | |
background: rgba(249, 164, 92, 0.3); /* Semi-transparent orange scrollbar thumb */ | |
border-radius: 4px; /* Rounded corners on scrollbar thumb */ | |
} | |
/* Error toast styling */ | |
.toast { | |
background: rgba(0, 0, 0, 0.8); /* Semi-transparent black background */ | |
backdrop-filter: blur(5px); /* Blur effect behind toast */ | |
color: var(--text-light); /* White text */ | |
font-size: 1.2rem; /* Large text size */ | |
} | |
</style> | |
</head> | |
<body> | |
<!-- Error message container that slides in when needed --> | |
<div id="error-toast" class="toast"></div> | |
<!-- Header section (hidden in presentation mode) --> | |
<div class="hero"> | |
<h1>Real-time Transcription</h1> | |
<p>Powered by FastRTC and Local Whisper 🤗</p> | |
</div> | |
<!-- Main content container --> | |
<div class="container"> | |
<!-- Container for transcript text --> | |
<div class="transcript-container" id="transcript"></div> | |
<!-- Controls for starting/stopping recording --> | |
<div class="controls"> | |
<button id="start-button">Start Recording</button> | |
</div> | |
</div> | |
<script> | |
// Global variables for WebRTC connection | |
let peerConnection; // Stores the WebRTC connection object for audio streaming | |
let webrtc_id; // A unique ID to identify this connection on the server | |
let audioContext, analyser, audioSource; // Audio processing objects for visualization | |
let audioLevel = 0; // Stores the current audio level (volume) from 0-1 | |
let animationFrame; // Reference to the animation frame for audio visualization | |
let isRecording = false; // Tracks whether we're currently recording or not | |
let eventSource; // Object that receives transcription results from the server | |
// DOM element references | |
const startButton = document.getElementById('start-button'); // The button to start/stop recording | |
const transcriptDiv = document.getElementById('transcript'); // The container for transcription text | |
// Variables for managing the transcript display | |
let currentParagraph = null; // Reference to the current paragraph being updated | |
let lastUpdateTime = Date.now(); // Timestamp of when we last updated the transcript | |
// Show error messages to the user in a toast notification | |
function showError(message) { | |
const toast = document.getElementById('error-toast'); // Get the toast element | |
toast.textContent = message; // Set the error message | |
toast.style.display = 'block'; // Make the toast visible | |
// Hide toast after 5 seconds | |
setTimeout(() => { | |
toast.style.display = 'none'; // Hide the toast | |
}, 5000); | |
} | |
// Handle messages received from the server through WebRTC data channel | |
function handleMessage(event) { | |
// Parse JSON message | |
const eventJson = JSON.parse(event.data); | |
// Display errors to the user | |
if (eventJson.type === "error") { | |
showError(eventJson.message); | |
} | |
// Log all messages to console for debugging | |
console.log('Received message:', event.data); | |
} | |
// Update button appearance based on connection state | |
function updateButtonState() { | |
// If connecting, show spinner | |
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) { | |
startButton.innerHTML = ` | |
<div class="icon-with-spinner"> | |
<div class="spinner"></div> | |
<span>Connecting...</span> | |
</div> | |
`; | |
isRecording = false; // Not recording while connecting | |
// If connected, show pulsing recording indicator | |
} else if (peerConnection && peerConnection.connectionState === 'connected') { | |
startButton.innerHTML = ` | |
<div class="pulse-container"> | |
<div class="pulse-circle"></div> | |
<span>Stop Recording</span> | |
</div> | |
`; | |
isRecording = true; // Set recording state to true | |
// Default state - ready to start | |
} else { | |
startButton.innerHTML = 'Start Recording'; | |
isRecording = false; // Not recording when not connected | |
} | |
} | |
// Set up audio visualization to show when the user is speaking | |
function setupAudioVisualization(stream) { | |
// Create or resume the audio context | |
if (!audioContext) { | |
// Create new audio context with browser compatibility handling | |
audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
} else { | |
// Resume context if it was suspended | |
if (audioContext.state === 'suspended') { | |
audioContext.resume(); | |
} | |
} | |
// Create audio analyzer for processing audio data | |
analyser = audioContext.createAnalyser(); | |
// Create media source from microphone stream | |
audioSource = audioContext.createMediaStreamSource(stream); | |
// Connect source to analyzer | |
audioSource.connect(analyser); | |
// Set FFT size (controls frequency data resolution) | |
analyser.fftSize = 64; | |
// Create array to store frequency data | |
const dataArray = new Uint8Array(analyser.frequencyBinCount); | |
// Function to continuously update audio level visualization | |
function updateAudioLevel() { | |
// Get audio frequency data | |
analyser.getByteFrequencyData(dataArray); | |
// Calculate average volume across all frequencies | |
const average = Array.from(dataArray).reduce((a, b) => a + b, 0) / dataArray.length; | |
// Convert to 0-1 scale | |
audioLevel = average / 255; | |
// Update pulse circle size based on audio level | |
const pulseCircle = document.querySelector('.pulse-circle'); | |
if (pulseCircle) { | |
pulseCircle.style.setProperty('--audio-level', 1 + audioLevel); | |
} | |
// Continue animation loop | |
animationFrame = requestAnimationFrame(updateAudioLevel); | |
} | |
// Start audio visualization loop | |
updateAudioLevel(); | |
} | |
// Set up WebRTC connection for streaming audio to server | |
async function setupWebRTC() { | |
// Get WebRTC configuration from global variable | |
const config = __RTC_CONFIGURATION__; | |
// Create new peer connection | |
peerConnection = new RTCPeerConnection(config); | |
// Set connection timeout (15 seconds) | |
const connectionTimeout = setTimeout(() => { | |
if (peerConnection && peerConnection.connectionState !== 'connected') { | |
showError('Connection timeout. Please check your network and try again.'); | |
stop(); // Stop connection attempt | |
} | |
}, 15000); | |
// Set warning for slow connection (5 seconds) | |
const timeoutId = setTimeout(() => { | |
const toast = document.getElementById('error-toast'); | |
toast.textContent = "Connection is taking longer than usual. Are you on a VPN?"; | |
toast.className = 'toast warning'; | |
toast.style.display = 'block'; | |
// Hide warning after 5 seconds | |
setTimeout(() => { | |
toast.style.display = 'none'; | |
}, 5000); | |
}, 5000); | |
try { | |
// Request access to user's microphone | |
const stream = await navigator.mediaDevices.getUserMedia({ | |
audio: true // Only request audio access | |
}); | |
// Set up audio visualization | |
setupAudioVisualization(stream); | |
// Add audio tracks to WebRTC connection | |
stream.getTracks().forEach(track => { | |
peerConnection.addTrack(track, stream); | |
}); | |
// Monitor connection state changes | |
peerConnection.addEventListener('connectionstatechange', () => { | |
// Log state changes | |
console.log('connectionstatechange', peerConnection.connectionState); | |
// Handle successful connection | |
if (peerConnection.connectionState === 'connected') { | |
clearTimeout(timeoutId); | |
clearTimeout(connectionTimeout); | |
const toast = document.getElementById('error-toast'); | |
toast.style.display = 'none'; | |
// Handle connection failures | |
} else if (peerConnection.connectionState === 'failed' || | |
peerConnection.connectionState === 'disconnected' || | |
peerConnection.connectionState === 'closed') { | |
showError('Connection lost. Please try again.'); | |
stop(); | |
} | |
// Update button appearance | |
updateButtonState(); | |
}); | |
// Create data channel for server messages | |
const dataChannel = peerConnection.createDataChannel('text'); | |
dataChannel.onmessage = handleMessage; // Set message handler | |
// Create connection offer | |
const offer = await peerConnection.createOffer(); | |
// Set local description (our end of connection) | |
await peerConnection.setLocalDescription(offer); | |
// Wait for ICE gathering to complete (finding connection methods) | |
await new Promise((resolve) => { | |
if (peerConnection.iceGatheringState === "complete") { | |
resolve(); // Already complete | |
} else { | |
// Function to check ICE gathering state | |
const checkState = () => { | |
if (peerConnection.iceGatheringState === "complete") { | |
peerConnection.removeEventListener("icegatheringstatechange", checkState); | |
resolve(); // Complete gathering | |
} | |
}; | |
// Listen for ICE gathering state changes | |
peerConnection.addEventListener("icegatheringstatechange", checkState); | |
} | |
}); | |
// Generate random ID for this connection | |
webrtc_id = Math.random().toString(36).substring(7); | |
// Send connection offer to server | |
const response = await fetch('/webrtc/offer', { | |
method: 'POST', | |
headers: { 'Content-Type': 'application/json' }, | |
body: JSON.stringify({ | |
sdp: peerConnection.localDescription.sdp, // Session description | |
type: peerConnection.localDescription.type, // Offer type | |
webrtc_id: webrtc_id // Unique connection ID | |
}) | |
}); | |
// Parse server response | |
const serverResponse = await response.json(); | |
// Handle server errors | |
if (serverResponse.status === 'failed') { | |
showError(serverResponse.meta.error === 'concurrency_limit_reached' | |
? `Too many connections. Maximum limit is ${serverResponse.meta.limit}` | |
: serverResponse.meta.error); | |
stop(); | |
startButton.textContent = 'Start Recording'; | |
return; | |
} | |
// Complete connection with server's description | |
await peerConnection.setRemoteDescription(serverResponse); | |
// Create event source for receiving transcription results | |
eventSource = new EventSource('/transcript?webrtc_id=' + webrtc_id); | |
// Handle event source errors | |
eventSource.onerror = (event) => { | |
console.error("EventSource error:", event); | |
showError("Transcription connection lost. Please try again."); | |
}; | |
// Process transcription results as they arrive | |
eventSource.addEventListener("output", (event) => { | |
console.log("Received transcript chunk:", event.data); | |
// Add text to display | |
appendTranscript(event.data); | |
//appendTranscriptSimple(event.data); | |
}); | |
} catch (err) { | |
// Handle any setup errors | |
clearTimeout(timeoutId); | |
console.error('Error setting up WebRTC:', err); | |
showError('Failed to establish connection. Please try again.'); | |
stop(); | |
startButton.textContent = 'Start Recording'; | |
} | |
} | |
function appendTranscriptSimple(text) { | |
const p = document.createElement('p'); | |
p.textContent = text; | |
transcriptDiv.appendChild(p); | |
transcriptDiv.scrollTop = transcriptDiv.scrollHeight; | |
} | |
// Add transcription text to display | |
function appendTranscript(text) { | |
// Clean up text | |
const formattedText = text.trim(); | |
if (!formattedText) return; | |
const now = Date.now(); | |
const timeSinceLastUpdate = now - lastUpdateTime; | |
lastUpdateTime = now; | |
// Handle transcript display | |
if (!currentParagraph) { | |
// Create new paragraph | |
currentParagraph = document.createElement('p'); | |
currentParagraph.classList.add('current'); | |
transcriptDiv.appendChild(currentParagraph); | |
currentParagraph.textContent = formattedText; | |
} else { | |
// Get current text | |
const currentText = currentParagraph.textContent; | |
// Fix spacing issues by normalizing | |
let cleanedText = formattedText; | |
// 1. Check for simple word repetition - last word repeated | |
const words = currentText.split(/\s+/); | |
const lastWord = words[words.length - 1].replace(/[^\w]/g, '').toLowerCase(); | |
if (lastWord && lastWord.length > 2) { | |
// Check if new text starts with the same word | |
const regex = new RegExp(`^${lastWord}`, 'i'); | |
if (regex.test(cleanedText.replace(/[^\w]/g, ''))) { | |
// Remove the first word if it's a duplicate | |
cleanedText = cleanedText.replace(regex, '').trim(); | |
} | |
} | |
// 2. Add proper spacing | |
let finalText = currentText; | |
// Only add space if current text doesn't end with space or punctuation | |
// and new text doesn't start with punctuation | |
if (!/[\s.,!?]$/.test(finalText) && !/^[.,!?]/.test(cleanedText) && cleanedText) { | |
finalText += ' '; | |
} | |
// 3. Add the cleaned text | |
finalText += cleanedText; | |
// 4. Fix any run-together words by adding spaces after punctuation | |
finalText = finalText.replace(/([.,!?])([a-zA-Z])/g, '$1 $2'); | |
// Update the paragraph text | |
currentParagraph.textContent = finalText; | |
} | |
// Create new paragraph on sentence end or pause | |
if (/[.!?]$/.test(formattedText) || timeSinceLastUpdate > 5000) { | |
// End current paragraph | |
if (currentParagraph) { | |
currentParagraph.classList.remove('current'); | |
} | |
// Prepare for next paragraph | |
currentParagraph = null; | |
} | |
// Limit number of displayed paragraphs | |
const paragraphs = transcriptDiv.getElementsByTagName('p'); | |
while (paragraphs.length > 10) { // Keep last 10 paragraphs | |
transcriptDiv.removeChild(paragraphs[0]); | |
} | |
// Scroll to show newest text | |
requestAnimationFrame(() => { | |
transcriptDiv.scrollTop = transcriptDiv.scrollHeight; | |
}); | |
} | |
// Stop recording and clean up resources | |
function stop() { | |
// Stop audio visualization | |
if (animationFrame) { | |
cancelAnimationFrame(animationFrame); | |
animationFrame = null; | |
} | |
// Pause audio processing | |
if (audioContext) { | |
audioContext.suspend(); | |
} | |
// Stop all media tracks | |
if (peerConnection) { | |
const senders = peerConnection.getSenders(); | |
if (senders) { | |
senders.forEach(sender => { | |
if (sender.track) { | |
sender.track.stop(); // Release microphone | |
} | |
}); | |
} | |
// Close WebRTC connection | |
peerConnection.close(); | |
peerConnection = null; | |
} | |
// Close transcription connection | |
if (eventSource) { | |
eventSource.close(); | |
eventSource = null; | |
} | |
// Reset audio level | |
audioLevel = 0; | |
// Update button display | |
updateButtonState(); | |
// Ask about clearing transcript | |
if (window.confirm('Clear transcript?')) { | |
// Clear all transcript text | |
transcriptDiv.innerHTML = ''; | |
currentParagraph = null; | |
} else { | |
// Just end current paragraph | |
if (currentParagraph) { | |
currentParagraph.classList.remove('current'); | |
currentParagraph = null; | |
} | |
} | |
// Reset timestamp | |
lastUpdateTime = Date.now(); | |
} | |
// Clean up resources when page is closed | |
window.addEventListener('beforeunload', () => { | |
stop(); // Stop recording and release resources | |
}); | |
// Handle start/stop button clicks | |
startButton.addEventListener('click', () => { | |
if (!isRecording) { | |
// Start recording if not already recording | |
setupWebRTC(); | |
} else { | |
// Stop recording if currently recording | |
stop(); | |
} | |
}); | |
// Initialize UI when page loads | |
document.addEventListener('DOMContentLoaded', () => { | |
// Ensure all UI elements are visible | |
const elementsToCheck = [ | |
transcriptDiv, | |
startButton, | |
document.getElementById('error-toast') | |
]; | |
// Set appropriate display for each element | |
elementsToCheck.forEach(el => { | |
if (el) { | |
// Set appropriate display style based on element type | |
el.style.display = el.tagName.toLowerCase() === 'button' ? 'block' : | |
(el.id === 'transcript' ? 'block' : 'none'); | |
} | |
}); | |
// Apply CSS variables to ensure theme is working | |
document.body.style.backgroundColor = 'var(--background-dark)'; | |
document.body.style.color = 'var(--text-light)'; | |
// Force button colors for consistency | |
startButton.style.backgroundColor = 'rgba(249, 164, 92, 1.0)'; | |
startButton.style.color = 'black'; | |
}); | |
</script> | |
</body> | |
</html> |