Spaces:

iisadia
/

KASOTI_GAME

Running

App Files Files Community

KASOTI_GAME / app.py

iisadia

Update app.py

4ac719f verified 21 days ago

raw

history blame contribute delete

2.82 kB

	import streamlit as st
	from transformers import pipeline
	import numpy as np
	import torchaudio
	from audio_recorder_streamlit import audio_recorder
	import torch
	from io import BytesIO
	import hashlib

	# Load Whisper model (cached)
	@st.cache_resource
	def load_model():
	return pipeline("automatic-speech-recognition", model="openai/whisper-base")

	# Audio processing function
	def process_audio(audio_bytes):
	waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
	if waveform.shape[0] > 1: # Convert stereo to mono
	waveform = torch.mean(waveform, dim=0, keepdim=True)
	if sample_rate != 16000: # Resample to 16kHz if needed
	resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
	waveform = resampler(waveform)
	return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}

	# Streamlit App
	st.title("Real-Time Voice Typing")
	st.write("Type or speak - text will appear instantly!")

	# Initialize session state
	if 'text_input' not in st.session_state:
	st.session_state.text_input = ""
	if 'last_audio_hash' not in st.session_state:
	st.session_state.last_audio_hash = ""

	# Main text area
	text_input = st.text_area(
	"Your text will appear here:",
	value=st.session_state.text_input,
	height=300,
	key="text_area"
	)

	# Audio recorder component
	audio_bytes = audio_recorder(
	pause_threshold=0.8, # Shorter pause threshold
	text="Speak to type",
	recording_color="#e8b62c",
	neutral_color="#6aa36f",
	)

	# Process new audio only if it's different from last time
	if audio_bytes:
	st.info("🎤 Audio received!")
	current_hash = hashlib.md5(audio_bytes).hexdigest()
	if current_hash != st.session_state.last_audio_hash:
	st.session_state.last_audio_hash = current_hash
	try:
	audio_input = process_audio(audio_bytes)
	whisper = load_model()
	transcribed_text = whisper(audio_input)["text"]
	st.info(f"📝 Transcribed: {transcribed_text}")

	# Append new transcription only if different
	if (not st.session_state.text_input.endswith(transcribed_text.strip()) and
	len(transcribed_text.strip()) > 0):
	st.session_state.text_input += " " + transcribed_text
	st.success("✅ Text added to box!")
	st.rerun()

	except Exception as e:
	st.error(f"Error: {str(e)}")

	# Control buttons
	col1, col2 = st.columns(2)
	with col1:
	if st.button("Clear Text"):
	st.session_state.text_input = ""
	st.session_state.last_audio_hash = ""
	st.rerun()
	with col2:
	st.download_button(
	"Download Text",
	data=st.session_state.text_input,
	file_name="voice_typed.txt",
	mime="text/plain"
	)