Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -23,8 +23,8 @@ from transformers import pipeline
|
|
23 |
|
24 |
@st.cache_resource
|
25 |
def load_voice_model():
|
26 |
-
|
27 |
-
|
28 |
|
29 |
def process_audio(audio_bytes):
|
30 |
waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
|
@@ -35,71 +35,33 @@ def process_audio(audio_bytes):
|
|
35 |
waveform = resampler(waveform)
|
36 |
return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
|
37 |
|
38 |
-
|
39 |
def get_voice_transcription(state_key):
|
40 |
"""Display audio recorder for a given key.
|
41 |
If new audio is recorded, transcribe it and update the session state.
|
42 |
"""
|
43 |
if state_key not in st.session_state:
|
44 |
st.session_state[state_key] = ""
|
45 |
-
|
46 |
# Use a unique key for the recorder widget
|
47 |
audio_bytes = audio_recorder(key=state_key + "_audio",
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
if audio_bytes:
|
54 |
current_hash = hashlib.md5(audio_bytes).hexdigest()
|
55 |
last_hash_key = state_key + "_last_hash"
|
56 |
-
|
57 |
if st.session_state.get(last_hash_key, "") != current_hash:
|
58 |
st.session_state[last_hash_key] = current_hash
|
59 |
-
|
60 |
-
# Create a status element
|
61 |
-
status = st.empty()
|
62 |
try:
|
63 |
-
# Show loading message
|
64 |
-
status.markdown("""
|
65 |
-
<div style="display: flex; align-items: center; gap: 0.5rem; padding: 0.5rem;
|
66 |
-
background: #f0f2f6; border-radius: 8px;">
|
67 |
-
<div class="loader"></div>
|
68 |
-
<span>Processing your voice...</span>
|
69 |
-
</div>
|
70 |
-
<style>
|
71 |
-
.loader {
|
72 |
-
border: 3px solid #f3f3f3;
|
73 |
-
border-radius: 50%;
|
74 |
-
border-top: 3px solid #6C63FF;
|
75 |
-
width: 20px;
|
76 |
-
height: 20px;
|
77 |
-
animation: spin 1s linear infinite;
|
78 |
-
}
|
79 |
-
@keyframes spin {
|
80 |
-
0% { transform: rotate(0deg); }
|
81 |
-
100% { transform: rotate(360deg); }
|
82 |
-
}
|
83 |
-
</style>
|
84 |
-
""", unsafe_allow_html=True)
|
85 |
-
|
86 |
-
# Process audio
|
87 |
audio_input = process_audio(audio_bytes)
|
88 |
whisper = load_voice_model()
|
89 |
transcribed_text = whisper(audio_input)["text"]
|
90 |
-
|
91 |
-
# Clear loading and show result
|
92 |
-
status.empty()
|
93 |
st.info(f"📝 Transcribed: {transcribed_text}")
|
94 |
-
|
95 |
-
# Update session state
|
96 |
st.session_state[state_key] += (" " + transcribed_text).strip()
|
97 |
st.experimental_rerun()
|
98 |
-
|
99 |
except Exception as e:
|
100 |
-
status.empty()
|
101 |
st.error(f"Voice input error: {str(e)}")
|
102 |
-
|
103 |
return st.session_state[state_key]
|
104 |
|
105 |
######################################
|
|
|
23 |
|
24 |
@st.cache_resource
|
25 |
def load_voice_model():
|
26 |
+
if 'whisper_model' not in st.session_state:
|
27 |
+
st.session_state.whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-base")
|
28 |
|
29 |
def process_audio(audio_bytes):
|
30 |
waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
|
|
|
35 |
waveform = resampler(waveform)
|
36 |
return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
|
37 |
|
|
|
38 |
def get_voice_transcription(state_key):
|
39 |
"""Display audio recorder for a given key.
|
40 |
If new audio is recorded, transcribe it and update the session state.
|
41 |
"""
|
42 |
if state_key not in st.session_state:
|
43 |
st.session_state[state_key] = ""
|
|
|
44 |
# Use a unique key for the recorder widget
|
45 |
audio_bytes = audio_recorder(key=state_key + "_audio",
|
46 |
+
pause_threshold=0.8,
|
47 |
+
text="Speak to type",
|
48 |
+
recording_color="#e8b62c",
|
49 |
+
neutral_color="#6aa36f")
|
|
|
50 |
if audio_bytes:
|
51 |
current_hash = hashlib.md5(audio_bytes).hexdigest()
|
52 |
last_hash_key = state_key + "_last_hash"
|
|
|
53 |
if st.session_state.get(last_hash_key, "") != current_hash:
|
54 |
st.session_state[last_hash_key] = current_hash
|
|
|
|
|
|
|
55 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
audio_input = process_audio(audio_bytes)
|
57 |
whisper = load_voice_model()
|
58 |
transcribed_text = whisper(audio_input)["text"]
|
|
|
|
|
|
|
59 |
st.info(f"📝 Transcribed: {transcribed_text}")
|
60 |
+
# Append (or set) new transcription
|
|
|
61 |
st.session_state[state_key] += (" " + transcribed_text).strip()
|
62 |
st.experimental_rerun()
|
|
|
63 |
except Exception as e:
|
|
|
64 |
st.error(f"Voice input error: {str(e)}")
|
|
|
65 |
return st.session_state[state_key]
|
66 |
|
67 |
######################################
|