Spaces:

iisadia
/

Testing_LLM_Project

Running

App Files Files Community

iisadia commited on 4 days ago

Commit

2829a73

verified ·

1 Parent(s): 8f95ee2

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -45

app.py CHANGED Viewed

@@ -23,8 +23,8 @@ from transformers import pipeline
 @st.cache_resource
 def load_voice_model():
-    # Loading the Whisper model (which automatically detects both English and Urdu)
-    return pipeline("automatic-speech-recognition", model="openai/whisper-base")
 def process_audio(audio_bytes):
     waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
@@ -35,71 +35,33 @@ def process_audio(audio_bytes):
         waveform = resampler(waveform)
     return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
 def get_voice_transcription(state_key):
     """Display audio recorder for a given key.
        If new audio is recorded, transcribe it and update the session state.
     """
     if state_key not in st.session_state:
         st.session_state[state_key] = ""
     # Use a unique key for the recorder widget
     audio_bytes = audio_recorder(key=state_key + "_audio",
-                               pause_threshold=0.8,
-                               text="Speak to type",
-                               recording_color="#e8b62c",
-                               neutral_color="#6aa36f")
     if audio_bytes:
         current_hash = hashlib.md5(audio_bytes).hexdigest()
         last_hash_key = state_key + "_last_hash"
         if st.session_state.get(last_hash_key, "") != current_hash:
             st.session_state[last_hash_key] = current_hash
-            # Create a status element
-            status = st.empty()
             try:
-                # Show loading message
-                status.markdown("""
-                <div style="display: flex; align-items: center; gap: 0.5rem; padding: 0.5rem;
-                           background: #f0f2f6; border-radius: 8px;">
-                    <div class="loader"></div>
-                    <span>Processing your voice...</span>
-                </div>
-                <style>
-                    .loader {
-                        border: 3px solid #f3f3f3;
-                        border-radius: 50%;
-                        border-top: 3px solid #6C63FF;
-                        width: 20px;
-                        height: 20px;
-                        animation: spin 1s linear infinite;
-                    }
-                    @keyframes spin {
-                        0% { transform: rotate(0deg); }
-                        100% { transform: rotate(360deg); }
-                    }
-                </style>
-                """, unsafe_allow_html=True)
-                # Process audio
                 audio_input = process_audio(audio_bytes)
                 whisper = load_voice_model()
                 transcribed_text = whisper(audio_input)["text"]
-                # Clear loading and show result
-                status.empty()
                 st.info(f"📝 Transcribed: {transcribed_text}")
-                # Update session state
                 st.session_state[state_key] += (" " + transcribed_text).strip()
                 st.experimental_rerun()
             except Exception as e:
-                status.empty()
                 st.error(f"Voice input error: {str(e)}")
     return st.session_state[state_key]
 ######################################

 @st.cache_resource
 def load_voice_model():
+    if 'whisper_model' not in st.session_state:
+        st.session_state.whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-base")
 def process_audio(audio_bytes):
     waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
         waveform = resampler(waveform)
     return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
 def get_voice_transcription(state_key):
     """Display audio recorder for a given key.
        If new audio is recorded, transcribe it and update the session state.
     """
     if state_key not in st.session_state:
         st.session_state[state_key] = ""
     # Use a unique key for the recorder widget
     audio_bytes = audio_recorder(key=state_key + "_audio",
+                                 pause_threshold=0.8,
+                                 text="Speak to type",
+                                 recording_color="#e8b62c",
+                                 neutral_color="#6aa36f")
     if audio_bytes:
         current_hash = hashlib.md5(audio_bytes).hexdigest()
         last_hash_key = state_key + "_last_hash"
         if st.session_state.get(last_hash_key, "") != current_hash:
             st.session_state[last_hash_key] = current_hash
             try:
                 audio_input = process_audio(audio_bytes)
                 whisper = load_voice_model()
                 transcribed_text = whisper(audio_input)["text"]
                 st.info(f"📝 Transcribed: {transcribed_text}")
+                # Append (or set) new transcription
                 st.session_state[state_key] += (" " + transcribed_text).strip()
                 st.experimental_rerun()
             except Exception as e:
                 st.error(f"Voice input error: {str(e)}")
     return st.session_state[state_key]
 ######################################