Spaces:

iisadia
/

KASOTI_GAME

Sleeping

App Files Files Community

KASOTI_GAME / app.py

iisadia

Update app.py

54d37c3 verified about 1 month ago

raw

history blame

2.27 kB

	import streamlit as st
	from transformers import pipeline
	import numpy as np
	import torchaudio
	from audio_recorder_streamlit import audio_recorder
	import torch
	from io import BytesIO

	# Load Whisper model (cached)
	@st.cache_resource
	def load_model():
	return pipeline("automatic-speech-recognition", model="openai/whisper-base")

	# Audio processing function
	def process_audio(audio_bytes):
	waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
	if waveform.shape[0] > 1: # Convert stereo to mono
	waveform = torch.mean(waveform, dim=0, keepdim=True)
	if sample_rate != 16000: # Resample to 16kHz if needed
	resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
	waveform = resampler(waveform)
	return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}

	# Streamlit App
	st.title("Real-Time Voice Typing")
	st.write("Type or speak - text will appear instantly!")

	# Initialize text in session state
	if 'text_input' not in st.session_state:
	st.session_state.text_input = ""

	# Main text area (auto-updates from session state)
	text_input = st.text_area(
	"Your text will appear here:",
	value=st.session_state.text_input,
	height=300,
	key="text_area"
	)

	# Audio recorder component
	audio_bytes = audio_recorder(
	pause_threshold=2.0, # Stop after 2 seconds of silence
	text="Speak to type",
	recording_color="#e8b62c",
	neutral_color="#6aa36f",
	)

	# Process audio in real-time
	if audio_bytes:
	try:
	audio_input = process_audio(audio_bytes)
	whisper = load_model()
	transcribed_text = whisper(audio_input)["text"]

	# Append new transcription to existing text
	st.session_state.text_input = st.session_state.text_input + " " + transcribed_text
	st.experimental_rerun() # Refresh to update text area

	except Exception as e:
	st.error(f"Error: {str(e)}")

	# Control buttons
	col1, col2 = st.columns(2)
	with col1:
	if st.button("Clear Text"):
	st.session_state.text_input = ""
	st.experimental_rerun()
	with col2:
	st.download_button(
	"Download Text",
	data=st.session_state.text_input,
	file_name="voice_typed.txt",
	mime="text/plain"
	)