VigneshDark commited on
Commit
08e515a
·
1 Parent(s): 04bc49f

fade option

Browse files
Files changed (1) hide show
  1. helper.py +26 -31
helper.py CHANGED
@@ -31,8 +31,8 @@ def overlay_audio(speech_audio, noise_audio):
31
 
32
  def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
33
  """
34
- Process speech and noise audio data to create a mixed audio output.
35
-
36
  Args:
37
  speech_data (numpy.ndarray): Speech audio data
38
  noise_data (numpy.ndarray): Noise audio data
@@ -40,9 +40,6 @@ def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
40
  noise_sr (int): Noise sample rate
41
  alpha (float): Speech volume adjustment
42
  beta (float): Noise volume adjustment
43
-
44
- Returns:
45
- AudioSegment: Processed audio
46
  """
47
  # Convert numpy arrays to AudioSegment
48
  speech_audio = AudioSegment(
@@ -62,35 +59,33 @@ def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
62
  # Get speech duration
63
  speech_duration = len(speech_audio) / 1000.0 # Convert ms to sec
64
 
65
- # Cut noise segment
 
 
 
66
  if len(noise_audio) / 1000.0 <= speech_duration:
67
  trimmed_noise = noise_audio
68
  else:
69
- start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000 # noqa
70
- trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)] # noqa
71
-
72
- trimmed_noise = trimmed_noise.set_frame_rate(8000)
73
-
74
- # # Calculate volumes and adjustments
75
- # speech_vol = get_audio_volume_db(speech_audio)
76
- # noise_vol = get_audio_volume_db(trimmed_noise)
77
-
78
- # current_snr = speech_vol - noise_vol
79
- # adjustment_needed = 10 - current_snr # target_snr hardcoded to 10
80
-
81
- # if adjustment_needed > 0: # Speech too quiet
82
- # speech_adjust = min(adjustment_needed, 2)
83
- # noise_adjust = -min(adjustment_needed / 2, 5)
84
- # else: # Speech too loud
85
- # speech_adjust = max(adjustment_needed, -5)
86
- # noise_adjust = -5 / 2
87
-
88
- # Apply adjustments
89
- adjusted_speech = adjust_volume(speech_audio, alpha)
90
- adjusted_noise = adjust_volume(trimmed_noise, beta)
91
-
92
- final_audio = overlay_audio(adjusted_speech, adjusted_noise)
93
-
94
  return final_audio
95
 
96
 
 
31
 
32
  def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
33
  """
34
+ Process speech and noise audio data with quality preservation.
35
+
36
  Args:
37
  speech_data (numpy.ndarray): Speech audio data
38
  noise_data (numpy.ndarray): Noise audio data
 
40
  noise_sr (int): Noise sample rate
41
  alpha (float): Speech volume adjustment
42
  beta (float): Noise volume adjustment
 
 
 
43
  """
44
  # Convert numpy arrays to AudioSegment
45
  speech_audio = AudioSegment(
 
59
  # Get speech duration
60
  speech_duration = len(speech_audio) / 1000.0 # Convert ms to sec
61
 
62
+ # Modify crossfade duration based on audio length
63
+ crossfade_duration = min(5, len(speech_audio) // 4) # Use 5ms or 1/4 of audio length, whichever is smaller
64
+
65
+ # Cut noise segment with crossfade to avoid clicks
66
  if len(noise_audio) / 1000.0 <= speech_duration:
67
  trimmed_noise = noise_audio
68
  else:
69
+ start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000
70
+ trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)]
71
+ # Adjust crossfade duration for short clips
72
+ trimmed_noise = trimmed_noise.fade_in(crossfade_duration).fade_out(crossfade_duration)
73
+
74
+ # Match sample rates before mixing
75
+ trimmed_noise = trimmed_noise.set_frame_rate(speech_sr)
76
+
77
+ # Gradual volume adjustment with adjusted crossfade
78
+ adjusted_speech = speech_audio
79
+ if alpha != 0:
80
+ adjusted_speech = adjust_volume(speech_audio, alpha).fade_in(crossfade_duration).fade_out(crossfade_duration)
81
+
82
+ adjusted_noise = trimmed_noise
83
+ if beta != 0:
84
+ adjusted_noise = adjust_volume(trimmed_noise, beta).fade_in(crossfade_duration).fade_out(crossfade_duration)
85
+
86
+ # Overlay with crossfade to preserve quality
87
+ final_audio = adjusted_speech.overlay(adjusted_noise, gain_during_overlay=0)
88
+
 
 
 
 
 
89
  return final_audio
90
 
91