Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -36,7 +36,7 @@ log_mel_spectrogram = T.AmplitudeToDB()
|
|
36 |
chunk_duration = WINDOW_SIZE
|
37 |
shift_duration = WINDOW_SIZE * 0.875 # Increased overlap compared to first version
|
38 |
|
39 |
-
def predict(
|
40 |
"""
|
41 |
Predict voice activity in an audio file with detailed processing and visualization.
|
42 |
|
@@ -49,10 +49,6 @@ def predict(audio_record, audio_upload, threshold):
|
|
49 |
"""
|
50 |
start_time = time.time()
|
51 |
|
52 |
-
audio_input = audio_record if audio_record else audio_upload
|
53 |
-
if not audio_input:
|
54 |
-
return "No audio provided!", 0.0, "N/A", None
|
55 |
-
|
56 |
try:
|
57 |
# Load and preprocess audio
|
58 |
waveform, orig_sample_rate = torchaudio.load(audio_input)
|
@@ -167,14 +163,13 @@ with gr.Blocks() as demo:
|
|
167 |
gr.Image("./img/logo.png", elem_id="logo", height=100)
|
168 |
# Title and Description
|
169 |
gr.Markdown("<h1 style='text-align: center; color: black;'>Voice Activity Detection using SincVAD</h1>")
|
170 |
-
gr.Markdown("<h3 style='text-align: center; color: black;'>
|
171 |
|
172 |
# Interface Layout
|
173 |
with gr.Row():
|
174 |
with gr.Column():
|
175 |
# Separate recording and file upload
|
176 |
-
|
177 |
-
upload_input = gr.Audio(type="filepath", label="Upload Audio")
|
178 |
threshold_input = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Threshold")
|
179 |
with gr.Column():
|
180 |
prediction_output = gr.Textbox(label="Prediction")
|
@@ -187,7 +182,7 @@ with gr.Blocks() as demo:
|
|
187 |
predict_btn = gr.Button("Start Prediction")
|
188 |
predict_btn.click(
|
189 |
predict,
|
190 |
-
[
|
191 |
[prediction_output, probability_output, time_output, plot_output],
|
192 |
api_name="predict"
|
193 |
)
|
|
|
36 |
chunk_duration = WINDOW_SIZE
|
37 |
shift_duration = WINDOW_SIZE * 0.875 # Increased overlap compared to first version
|
38 |
|
39 |
+
def predict(audio_input, threshold):
|
40 |
"""
|
41 |
Predict voice activity in an audio file with detailed processing and visualization.
|
42 |
|
|
|
49 |
"""
|
50 |
start_time = time.time()
|
51 |
|
|
|
|
|
|
|
|
|
52 |
try:
|
53 |
# Load and preprocess audio
|
54 |
waveform, orig_sample_rate = torchaudio.load(audio_input)
|
|
|
163 |
gr.Image("./img/logo.png", elem_id="logo", height=100)
|
164 |
# Title and Description
|
165 |
gr.Markdown("<h1 style='text-align: center; color: black;'>Voice Activity Detection using SincVAD</h1>")
|
166 |
+
gr.Markdown("<h3 style='text-align: center; color: black;'>Upload or record audio to predict speech activity and view the probability curve.</h3>")
|
167 |
|
168 |
# Interface Layout
|
169 |
with gr.Row():
|
170 |
with gr.Column():
|
171 |
# Separate recording and file upload
|
172 |
+
audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
|
|
|
173 |
threshold_input = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Threshold")
|
174 |
with gr.Column():
|
175 |
prediction_output = gr.Textbox(label="Prediction")
|
|
|
182 |
predict_btn = gr.Button("Start Prediction")
|
183 |
predict_btn.click(
|
184 |
predict,
|
185 |
+
[audio_input, threshold_input],
|
186 |
[prediction_output, probability_output, time_output, plot_output],
|
187 |
api_name="predict"
|
188 |
)
|