Spaces:

iisadia
/

KASOTI_GAME

Running

App Files Files Community

iisadia commited on 22 days ago

Commit

54d37c3

verified ·

1 Parent(s): 3964945

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -69

app.py CHANGED Viewed

@@ -2,89 +2,73 @@ import streamlit as st
 from transformers import pipeline
 import numpy as np
 import torchaudio
-from io import BytesIO
 from audio_recorder_streamlit import audio_recorder
 import torch
-# Load Whisper model
 @st.cache_resource
 def load_model():
     return pipeline("automatic-speech-recognition", model="openai/whisper-base")
-st.title("Text Entry with Voice Input")
-st.write("Enter text manually or use voice input:")
-# Initialize session state
-if 'combined_text' not in st.session_state:
-    st.session_state.combined_text = ""
-# Create columns layout
-col1, col2 = st.columns(2)
-with col1:
-    # Text input
-    text_input = st.text_area("Type your text here:", height=200)
-with col2:
-    # Audio input
-    st.write("Record your voice:")
-    audio_bytes = audio_recorder()
-    if audio_bytes:
-        st.audio(audio_bytes, format="audio/wav")
-def process_audio(audio_bytes):
     try:
-        # Convert bytes to numpy array
-        waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
-        # Convert stereo to mono if needed
-        if waveform.shape[0] > 1:
-            waveform = torch.mean(waveform, dim=0, keepdim=True)
-        # Resample to 16kHz if needed (Whisper's expected sample rate)
-        if sample_rate != 16000:
-            resampler = torchaudio.transforms.Resample(
-                orig_freq=sample_rate,
-                new_freq=16000
-            )
-            waveform = resampler(waveform)
-            sample_rate = 16000
-        # Convert to numpy array
-        audio_np = waveform.numpy().squeeze()
-        return {"raw": audio_np, "sampling_rate": sample_rate}
     except Exception as e:
-        st.error(f"Audio processing error: {str(e)}")
-        return None
-# Process audio when recording is available
-if audio_bytes:
-    audio_input = process_audio(audio_bytes)
-    if audio_input:
-        try:
-            # Transcribe audio
-            whisper = load_model()
-            transcribed_text = whisper(audio_input)["text"]
-            # Update session state
-            st.session_state.combined_text = f"{text_input}\n{transcribed_text}".strip()
-        except Exception as e:
-            st.error(f"Transcription error: {str(e)}")
-# Combine inputs when button is clicked
-if st.button("Submit"):
-    if not text_input and not audio_bytes:
-        st.warning("Please enter text or record audio")
-    else:
-        # Display combined text
-        st.subheader("Combined Input:")
-        st.write(st.session_state.combined_text)
-        # Add download button
-        st.download_button(
-            label="Download Text",
-            data=st.session_state.combined_text,
-            file_name="combined_input.txt",
-            mime="text/plain"
-        )

 from transformers import pipeline
 import numpy as np
 import torchaudio
 from audio_recorder_streamlit import audio_recorder
 import torch
+from io import BytesIO
+# Load Whisper model (cached)
 @st.cache_resource
 def load_model():
     return pipeline("automatic-speech-recognition", model="openai/whisper-base")
+# Audio processing function
+def process_audio(audio_bytes):
+    waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
+    if waveform.shape[0] > 1:  # Convert stereo to mono
+        waveform = torch.mean(waveform, dim=0, keepdim=True)
+    if sample_rate != 16000:  # Resample to 16kHz if needed
+        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
+        waveform = resampler(waveform)
+    return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
+# Streamlit App
+st.title("Real-Time Voice Typing")
+st.write("Type or speak - text will appear instantly!")
+# Initialize text in session state
+if 'text_input' not in st.session_state:
+    st.session_state.text_input = ""
+# Main text area (auto-updates from session state)
+text_input = st.text_area(
+    "Your text will appear here:",
+    value=st.session_state.text_input,
+    height=300,
+    key="text_area"
+)
+# Audio recorder component
+audio_bytes = audio_recorder(
+    pause_threshold=2.0,  # Stop after 2 seconds of silence
+    text="Speak to type",
+    recording_color="#e8b62c",
+    neutral_color="#6aa36f",
+)
+# Process audio in real-time
+if audio_bytes:
     try:
+        audio_input = process_audio(audio_bytes)
+        whisper = load_model()
+        transcribed_text = whisper(audio_input)["text"]
+        # Append new transcription to existing text
+        st.session_state.text_input = st.session_state.text_input + " " + transcribed_text
+        st.experimental_rerun()  # Refresh to update text area
     except Exception as e:
+        st.error(f"Error: {str(e)}")
+# Control buttons
+col1, col2 = st.columns(2)
+with col1:
+    if st.button("Clear Text"):
+        st.session_state.text_input = ""
+        st.experimental_rerun()
+with col2:
+    st.download_button(
+        "Download Text",
+        data=st.session_state.text_input,
+        file_name="voice_typed.txt",
+        mime="text/plain"
+    )