iisadia commited on
Commit
2829a73
·
verified ·
1 Parent(s): 8f95ee2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -45
app.py CHANGED
@@ -23,8 +23,8 @@ from transformers import pipeline
23
 
24
  @st.cache_resource
25
  def load_voice_model():
26
- # Loading the Whisper model (which automatically detects both English and Urdu)
27
- return pipeline("automatic-speech-recognition", model="openai/whisper-base")
28
 
29
  def process_audio(audio_bytes):
30
  waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
@@ -35,71 +35,33 @@ def process_audio(audio_bytes):
35
  waveform = resampler(waveform)
36
  return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
37
 
38
-
39
  def get_voice_transcription(state_key):
40
  """Display audio recorder for a given key.
41
  If new audio is recorded, transcribe it and update the session state.
42
  """
43
  if state_key not in st.session_state:
44
  st.session_state[state_key] = ""
45
-
46
  # Use a unique key for the recorder widget
47
  audio_bytes = audio_recorder(key=state_key + "_audio",
48
- pause_threshold=0.8,
49
- text="Speak to type",
50
- recording_color="#e8b62c",
51
- neutral_color="#6aa36f")
52
-
53
  if audio_bytes:
54
  current_hash = hashlib.md5(audio_bytes).hexdigest()
55
  last_hash_key = state_key + "_last_hash"
56
-
57
  if st.session_state.get(last_hash_key, "") != current_hash:
58
  st.session_state[last_hash_key] = current_hash
59
-
60
- # Create a status element
61
- status = st.empty()
62
  try:
63
- # Show loading message
64
- status.markdown("""
65
- <div style="display: flex; align-items: center; gap: 0.5rem; padding: 0.5rem;
66
- background: #f0f2f6; border-radius: 8px;">
67
- <div class="loader"></div>
68
- <span>Processing your voice...</span>
69
- </div>
70
- <style>
71
- .loader {
72
- border: 3px solid #f3f3f3;
73
- border-radius: 50%;
74
- border-top: 3px solid #6C63FF;
75
- width: 20px;
76
- height: 20px;
77
- animation: spin 1s linear infinite;
78
- }
79
- @keyframes spin {
80
- 0% { transform: rotate(0deg); }
81
- 100% { transform: rotate(360deg); }
82
- }
83
- </style>
84
- """, unsafe_allow_html=True)
85
-
86
- # Process audio
87
  audio_input = process_audio(audio_bytes)
88
  whisper = load_voice_model()
89
  transcribed_text = whisper(audio_input)["text"]
90
-
91
- # Clear loading and show result
92
- status.empty()
93
  st.info(f"📝 Transcribed: {transcribed_text}")
94
-
95
- # Update session state
96
  st.session_state[state_key] += (" " + transcribed_text).strip()
97
  st.experimental_rerun()
98
-
99
  except Exception as e:
100
- status.empty()
101
  st.error(f"Voice input error: {str(e)}")
102
-
103
  return st.session_state[state_key]
104
 
105
  ######################################
 
23
 
24
  @st.cache_resource
25
  def load_voice_model():
26
+ if 'whisper_model' not in st.session_state:
27
+ st.session_state.whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-base")
28
 
29
  def process_audio(audio_bytes):
30
  waveform, sample_rate = torchaudio.load(BytesIO(audio_bytes))
 
35
  waveform = resampler(waveform)
36
  return {"raw": waveform.numpy().squeeze(), "sampling_rate": 16000}
37
 
 
38
  def get_voice_transcription(state_key):
39
  """Display audio recorder for a given key.
40
  If new audio is recorded, transcribe it and update the session state.
41
  """
42
  if state_key not in st.session_state:
43
  st.session_state[state_key] = ""
 
44
  # Use a unique key for the recorder widget
45
  audio_bytes = audio_recorder(key=state_key + "_audio",
46
+ pause_threshold=0.8,
47
+ text="Speak to type",
48
+ recording_color="#e8b62c",
49
+ neutral_color="#6aa36f")
 
50
  if audio_bytes:
51
  current_hash = hashlib.md5(audio_bytes).hexdigest()
52
  last_hash_key = state_key + "_last_hash"
 
53
  if st.session_state.get(last_hash_key, "") != current_hash:
54
  st.session_state[last_hash_key] = current_hash
 
 
 
55
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  audio_input = process_audio(audio_bytes)
57
  whisper = load_voice_model()
58
  transcribed_text = whisper(audio_input)["text"]
 
 
 
59
  st.info(f"📝 Transcribed: {transcribed_text}")
60
+ # Append (or set) new transcription
 
61
  st.session_state[state_key] += (" " + transcribed_text).strip()
62
  st.experimental_rerun()
 
63
  except Exception as e:
 
64
  st.error(f"Voice input error: {str(e)}")
 
65
  return st.session_state[state_key]
66
 
67
  ######################################