Spaces:

jethrowang
/

SincQDR-VAD_Demo

Sleeping

App Files Files Community

jethrowang commited on Mar 25

Commit

1cf2426

verified ·

1 Parent(s): 6fb9356

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -9

app.py CHANGED Viewed

@@ -36,7 +36,7 @@ log_mel_spectrogram = T.AmplitudeToDB()
 chunk_duration = WINDOW_SIZE
 shift_duration = WINDOW_SIZE * 0.875  # Increased overlap compared to first version
-def predict(audio_record, audio_upload, threshold):
     """
     Predict voice activity in an audio file with detailed processing and visualization.
@@ -49,10 +49,6 @@ def predict(audio_record, audio_upload, threshold):
     """
     start_time = time.time()
-    audio_input = audio_record if audio_record else audio_upload
-    if not audio_input:
-        return "No audio provided!", 0.0, "N/A", None
     try:
         # Load and preprocess audio
         waveform, orig_sample_rate = torchaudio.load(audio_input)
@@ -167,14 +163,13 @@ with gr.Blocks() as demo:
     gr.Image("./img/logo.png", elem_id="logo", height=100)
     # Title and Description
     gr.Markdown("<h1 style='text-align: center; color: black;'>Voice Activity Detection using SincVAD</h1>")
-    gr.Markdown("<h3 style='text-align: center; color: black;'>Record or upload audio to predict speech activity and view the probability curve.</h3>")
     # Interface Layout
     with gr.Row():
         with gr.Column():
             # Separate recording and file upload
-            record_input = gr.Microphone(type="filepath", label="Record Audio")
-            upload_input = gr.Audio(type="filepath", label="Upload Audio")
             threshold_input = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Threshold")
         with gr.Column():
             prediction_output = gr.Textbox(label="Prediction")
@@ -187,7 +182,7 @@ with gr.Blocks() as demo:
     predict_btn = gr.Button("Start Prediction")
     predict_btn.click(
         predict,
-        [record_input, upload_input, threshold_input],
         [prediction_output, probability_output, time_output, plot_output],
         api_name="predict"
     )

 chunk_duration = WINDOW_SIZE
 shift_duration = WINDOW_SIZE * 0.875  # Increased overlap compared to first version
+def predict(audio_input, threshold):
     """
     Predict voice activity in an audio file with detailed processing and visualization.
     """
     start_time = time.time()
     try:
         # Load and preprocess audio
         waveform, orig_sample_rate = torchaudio.load(audio_input)
     gr.Image("./img/logo.png", elem_id="logo", height=100)
     # Title and Description
     gr.Markdown("<h1 style='text-align: center; color: black;'>Voice Activity Detection using SincVAD</h1>")
+    gr.Markdown("<h3 style='text-align: center; color: black;'>Upload or record audio to predict speech activity and view the probability curve.</h3>")
     # Interface Layout
     with gr.Row():
         with gr.Column():
             # Separate recording and file upload
+            audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
             threshold_input = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Threshold")
         with gr.Column():
             prediction_output = gr.Textbox(label="Prediction")
     predict_btn = gr.Button("Start Prediction")
     predict_btn.click(
         predict,
+        [audio_input, threshold_input],
         [prediction_output, probability_output, time_output, plot_output],
         api_name="predict"
     )