Spaces:

VigneshDark
/

background_noise_over_speech

Sleeping

VigneshDark commited on Mar 12

Commit

82f19d8

1 Parent(s): b36d167

fix: typo

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,34 +1,35 @@
-import gradio as gr
 from helper import process_audio
-import os
-import numpy as np
 # Sample audio file paths
 SAMPLE_SPEECH = "anushka.wav"
 SAMPLE_NOISE = "traffic.wav"
 def process_audio_files(speech_file, noise_file, alpha, beta):
     """
     Process the audio files and return the mixed output
     Args:
         speech_file (tuple): Speech audio (sample_rate, data)
         noise_file (tuple): Noise audio (sample_rate, data)
         alpha (float): First slider value (-30 to +30)
         beta (float): Second slider value (-30 to +30)
     Returns:
         tuple: (sample_rate, processed_audio_data)
     """
     speech_sr, speech_data = speech_file
     noise_sr, noise_data = noise_file
     # Process the audio using the helper function
-    output_audio = process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta)
     # Convert AudioSegment to numpy array
     samples = np.array(output_audio.get_array_of_samples())
     return (output_audio.frame_rate, samples)

+import gradio as gr  # type: ignore
 from helper import process_audio
+import numpy as np  # type: ignore
 # Sample audio file paths
 SAMPLE_SPEECH = "anushka.wav"
 SAMPLE_NOISE = "traffic.wav"
 def process_audio_files(speech_file, noise_file, alpha, beta):
     """
     Process the audio files and return the mixed output
     Args:
         speech_file (tuple): Speech audio (sample_rate, data)
         noise_file (tuple): Noise audio (sample_rate, data)
         alpha (float): First slider value (-30 to +30)
         beta (float): Second slider value (-30 to +30)
     Returns:
         tuple: (sample_rate, processed_audio_data)
     """
     speech_sr, speech_data = speech_file
     noise_sr, noise_data = noise_file
     # Process the audio using the helper function
+    output_audio = process_audio(speech_data, noise_data, speech_sr, noise_sr,
+                                 alpha, beta)
     # Convert AudioSegment to numpy array
     samples = np.array(output_audio.get_array_of_samples())
     return (output_audio.frame_rate, samples)

helper.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import random
-from pydub import AudioSegment
 # from pydub.effects import normalize
-import numpy as np
 def get_audio_volume_db(audio):
-    """Estimate the volume in dBFS (decibels relative to full scale) using PyDub."""
-    return audio.dBFS if audio.dBFS != float('-inf') else -50.0  # Default to -50 dB for silence
 def adjust_volume(audio, volume_change_db):
@@ -46,16 +46,16 @@ def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
     """
     # Convert numpy arrays to AudioSegment
     speech_audio = AudioSegment(
-        speech_data.tobytes(),
         frame_rate=speech_sr,
-        sample_width=speech_data.dtype.itemsize,
         channels=1
     )
     noise_audio = AudioSegment(
-        noise_data.tobytes(),
         frame_rate=noise_sr,
-        sample_width=noise_data.dtype.itemsize,
         channels=1
     )
@@ -66,8 +66,8 @@ def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
     if len(noise_audio) / 1000.0 <= speech_duration:
         trimmed_noise = noise_audio
     else:
-        start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000
-        trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)]
     trimmed_noise = trimmed_noise.set_frame_rate(8000)

 import random
+from pydub import AudioSegment  # type: ignore
 # from pydub.effects import normalize
+# import numpy as np  # type: ignore
 def get_audio_volume_db(audio):
+    """Estimate the volume in dBFS (decibels relative to full scale) using PyDub."""  # noqa
+    return audio.dBFS if audio.dBFS != float('-inf') else -50.0  # Default to -50 dB for silence  # noqa
 def adjust_volume(audio, volume_change_db):
     """
     # Convert numpy arrays to AudioSegment
     speech_audio = AudioSegment(
+        speech_data.tobytes(),
         frame_rate=speech_sr,
+        sample_width=speech_data.dtype.itemsize,
         channels=1
     )
     noise_audio = AudioSegment(
+        noise_data.tobytes(),
         frame_rate=noise_sr,
+        sample_width=noise_data.dtype.itemsize,
         channels=1
     )
     if len(noise_audio) / 1000.0 <= speech_duration:
         trimmed_noise = noise_audio
     else:
+        start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000  # noqa
+        trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)]  # noqa
     trimmed_noise = trimmed_noise.set_frame_rate(8000)