jethrowang commited on
Commit
1cf2426
·
verified ·
1 Parent(s): 6fb9356

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -9
app.py CHANGED
@@ -36,7 +36,7 @@ log_mel_spectrogram = T.AmplitudeToDB()
36
  chunk_duration = WINDOW_SIZE
37
  shift_duration = WINDOW_SIZE * 0.875 # Increased overlap compared to first version
38
 
39
- def predict(audio_record, audio_upload, threshold):
40
  """
41
  Predict voice activity in an audio file with detailed processing and visualization.
42
 
@@ -49,10 +49,6 @@ def predict(audio_record, audio_upload, threshold):
49
  """
50
  start_time = time.time()
51
 
52
- audio_input = audio_record if audio_record else audio_upload
53
- if not audio_input:
54
- return "No audio provided!", 0.0, "N/A", None
55
-
56
  try:
57
  # Load and preprocess audio
58
  waveform, orig_sample_rate = torchaudio.load(audio_input)
@@ -167,14 +163,13 @@ with gr.Blocks() as demo:
167
  gr.Image("./img/logo.png", elem_id="logo", height=100)
168
  # Title and Description
169
  gr.Markdown("<h1 style='text-align: center; color: black;'>Voice Activity Detection using SincVAD</h1>")
170
- gr.Markdown("<h3 style='text-align: center; color: black;'>Record or upload audio to predict speech activity and view the probability curve.</h3>")
171
 
172
  # Interface Layout
173
  with gr.Row():
174
  with gr.Column():
175
  # Separate recording and file upload
176
- record_input = gr.Microphone(type="filepath", label="Record Audio")
177
- upload_input = gr.Audio(type="filepath", label="Upload Audio")
178
  threshold_input = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Threshold")
179
  with gr.Column():
180
  prediction_output = gr.Textbox(label="Prediction")
@@ -187,7 +182,7 @@ with gr.Blocks() as demo:
187
  predict_btn = gr.Button("Start Prediction")
188
  predict_btn.click(
189
  predict,
190
- [record_input, upload_input, threshold_input],
191
  [prediction_output, probability_output, time_output, plot_output],
192
  api_name="predict"
193
  )
 
36
  chunk_duration = WINDOW_SIZE
37
  shift_duration = WINDOW_SIZE * 0.875 # Increased overlap compared to first version
38
 
39
+ def predict(audio_input, threshold):
40
  """
41
  Predict voice activity in an audio file with detailed processing and visualization.
42
 
 
49
  """
50
  start_time = time.time()
51
 
 
 
 
 
52
  try:
53
  # Load and preprocess audio
54
  waveform, orig_sample_rate = torchaudio.load(audio_input)
 
163
  gr.Image("./img/logo.png", elem_id="logo", height=100)
164
  # Title and Description
165
  gr.Markdown("<h1 style='text-align: center; color: black;'>Voice Activity Detection using SincVAD</h1>")
166
+ gr.Markdown("<h3 style='text-align: center; color: black;'>Upload or record audio to predict speech activity and view the probability curve.</h3>")
167
 
168
  # Interface Layout
169
  with gr.Row():
170
  with gr.Column():
171
  # Separate recording and file upload
172
+ audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
 
173
  threshold_input = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Threshold")
174
  with gr.Column():
175
  prediction_output = gr.Textbox(label="Prediction")
 
182
  predict_btn = gr.Button("Start Prediction")
183
  predict_btn.click(
184
  predict,
185
+ [audio_input, threshold_input],
186
  [prediction_output, probability_output, time_output, plot_output],
187
  api_name="predict"
188
  )