Spaces:
Sleeping
Sleeping
File size: 2,266 Bytes
c4035be 8b91795 6ddfbf8 6dc3295 6ddfbf8 54d37c3 8b91795 54d37c3 8b91795 6dc3295 8b91795 54d37c3 8b91795 54d37c3 8b91795 54d37c3 8b91795 54d37c3 8b91795 54d37c3 8b91795 54d37c3 c4035be 54d37c3 6ddfbf8 54d37c3 8b91795 54d37c3 8b91795 54d37c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import streamlit as st
from transformers import pipeline
import numpy as np
import torchaudio
from audio_recorder_streamlit import audio_recorder
import torch
from io import BytesIO
# Load Whisper model (cached)
@st.cache_resource
def load_model():
return pipeline("automatic-speech-recognition", model="openai/whisper-base")
# Audio processing function
def process_audio(audio_bytes):
waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
if waveform.shape[0] > 1: # Convert stereo to mono
waveform = torch.mean(waveform, dim=0, keepdim=True)
if sample_rate != 16000: # Resample to 16kHz if needed
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
waveform = resampler(waveform)
return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
# Streamlit App
st.title("Real-Time Voice Typing")
st.write("Type or speak - text will appear instantly!")
# Initialize text in session state
if 'text_input' not in st.session_state:
st.session_state.text_input = ""
# Main text area (auto-updates from session state)
text_input = st.text_area(
"Your text will appear here:",
value=st.session_state.text_input,
height=300,
key="text_area"
)
# Audio recorder component
audio_bytes = audio_recorder(
pause_threshold=2.0, # Stop after 2 seconds of silence
text="Speak to type",
recording_color="#e8b62c",
neutral_color="#6aa36f",
)
# Process audio in real-time
if audio_bytes:
try:
audio_input = process_audio(audio_bytes)
whisper = load_model()
transcribed_text = whisper(audio_input)["text"]
# Append new transcription to existing text
st.session_state.text_input = st.session_state.text_input + " " + transcribed_text
st.experimental_rerun() # Refresh to update text area
except Exception as e:
st.error(f"Error: {str(e)}")
# Control buttons
col1, col2 = st.columns(2)
with col1:
if st.button("Clear Text"):
st.session_state.text_input = ""
st.experimental_rerun()
with col2:
st.download_button(
"Download Text",
data=st.session_state.text_input,
file_name="voice_typed.txt",
mime="text/plain"
) |