Spaces:

VigneshDark
/

background_noise_over_speech

Sleeping

App Files Files Community

VigneshDark commited on Apr 13

Commit

08e515a

1 Parent(s): 04bc49f

fade option

Browse files

Files changed (1) hide show

helper.py +26 -31

helper.py CHANGED Viewed

@@ -31,8 +31,8 @@ def overlay_audio(speech_audio, noise_audio):
 def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
     """
-    Process speech and noise audio data to create a mixed audio output.
     Args:
         speech_data (numpy.ndarray): Speech audio data
         noise_data (numpy.ndarray): Noise audio data
@@ -40,9 +40,6 @@ def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
         noise_sr (int): Noise sample rate
         alpha (float): Speech volume adjustment
         beta (float): Noise volume adjustment
-    Returns:
-        AudioSegment: Processed audio
     """
     # Convert numpy arrays to AudioSegment
     speech_audio = AudioSegment(
@@ -62,35 +59,33 @@ def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
     # Get speech duration
     speech_duration = len(speech_audio) / 1000.0  # Convert ms to sec
-    # Cut noise segment
     if len(noise_audio) / 1000.0 <= speech_duration:
         trimmed_noise = noise_audio
     else:
-        start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000  # noqa
-        trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)]  # noqa
-    trimmed_noise = trimmed_noise.set_frame_rate(8000)
-    # # Calculate volumes and adjustments
-    # speech_vol = get_audio_volume_db(speech_audio)
-    # noise_vol = get_audio_volume_db(trimmed_noise)
-    # current_snr = speech_vol - noise_vol
-    # adjustment_needed = 10 - current_snr  # target_snr hardcoded to 10
-    # if adjustment_needed > 0:  # Speech too quiet
-    #     speech_adjust = min(adjustment_needed, 2)
-    #     noise_adjust = -min(adjustment_needed / 2, 5)
-    # else:  # Speech too loud
-    #     speech_adjust = max(adjustment_needed, -5)
-    #     noise_adjust = -5 / 2
-    # Apply adjustments
-    adjusted_speech = adjust_volume(speech_audio, alpha)
-    adjusted_noise = adjust_volume(trimmed_noise, beta)
-    final_audio = overlay_audio(adjusted_speech, adjusted_noise)
     return final_audio

 def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
     """
+    Process speech and noise audio data with quality preservation.
     Args:
         speech_data (numpy.ndarray): Speech audio data
         noise_data (numpy.ndarray): Noise audio data
         noise_sr (int): Noise sample rate
         alpha (float): Speech volume adjustment
         beta (float): Noise volume adjustment
     """
     # Convert numpy arrays to AudioSegment
     speech_audio = AudioSegment(
     # Get speech duration
     speech_duration = len(speech_audio) / 1000.0  # Convert ms to sec
+    # Modify crossfade duration based on audio length
+    crossfade_duration = min(5, len(speech_audio) // 4)  # Use 5ms or 1/4 of audio length, whichever is smaller
+    # Cut noise segment with crossfade to avoid clicks
     if len(noise_audio) / 1000.0 <= speech_duration:
         trimmed_noise = noise_audio
     else:
+        start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000
+        trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)]
+        # Adjust crossfade duration for short clips
+        trimmed_noise = trimmed_noise.fade_in(crossfade_duration).fade_out(crossfade_duration)
+    # Match sample rates before mixing
+    trimmed_noise = trimmed_noise.set_frame_rate(speech_sr)
+    # Gradual volume adjustment with adjusted crossfade
+    adjusted_speech = speech_audio
+    if alpha != 0:
+        adjusted_speech = adjust_volume(speech_audio, alpha).fade_in(crossfade_duration).fade_out(crossfade_duration)
+    adjusted_noise = trimmed_noise
+    if beta != 0:
+        adjusted_noise = adjust_volume(trimmed_noise, beta).fade_in(crossfade_duration).fade_out(crossfade_duration)
+    # Overlay with crossfade to preserve quality
+    final_audio = adjusted_speech.overlay(adjusted_noise, gain_during_overlay=0)
     return final_audio