HoneyTian commited on
Commit
515f154
·
1 Parent(s): 9b1d5cc
toolbox/torch/utils/data/dataset/denoise_excel_dataset.py CHANGED
@@ -18,11 +18,13 @@ class DenoiseExcelDataset(Dataset):
18
  expected_sample_rate: int,
19
  resample: bool = False,
20
  max_wave_value: float = 1.0,
 
21
  ):
22
  self.excel_file = excel_file
23
  self.expected_sample_rate = expected_sample_rate
24
  self.resample = resample
25
  self.max_wave_value = max_wave_value
 
26
 
27
  self.samples = self.load_samples(excel_file)
28
 
@@ -77,7 +79,7 @@ class DenoiseExcelDataset(Dataset):
77
  mix_wave, noise_wave_adjusted = self.mix_speech_and_noise(
78
  speech=speech_wave.numpy(),
79
  noise=noise_wave.numpy(),
80
- snr_db=snr_db,
81
  )
82
  mix_wave = torch.tensor(mix_wave, dtype=torch.float32)
83
  noise_wave_adjusted = torch.tensor(noise_wave_adjusted, dtype=torch.float32)
@@ -108,7 +110,7 @@ class DenoiseExcelDataset(Dataset):
108
  return waveform
109
 
110
  @staticmethod
111
- def mix_speech_and_noise(speech: np.ndarray, noise: np.ndarray, snr_db: float):
112
  l1 = len(speech)
113
  l2 = len(noise)
114
  l = min(l1, l2)
@@ -120,7 +122,7 @@ class DenoiseExcelDataset(Dataset):
120
  speech_power = np.mean(np.square(speech))
121
  noise_power = speech_power / (10 ** (snr_db / 10))
122
 
123
- noise_adjusted = np.sqrt(noise_power) * noise / np.sqrt(np.mean(noise ** 2))
124
 
125
  noisy_signal = speech + noise_adjusted
126
 
 
18
  expected_sample_rate: int,
19
  resample: bool = False,
20
  max_wave_value: float = 1.0,
21
+ eps: float = 1e-8,
22
  ):
23
  self.excel_file = excel_file
24
  self.expected_sample_rate = expected_sample_rate
25
  self.resample = resample
26
  self.max_wave_value = max_wave_value
27
+ self.eps = eps
28
 
29
  self.samples = self.load_samples(excel_file)
30
 
 
79
  mix_wave, noise_wave_adjusted = self.mix_speech_and_noise(
80
  speech=speech_wave.numpy(),
81
  noise=noise_wave.numpy(),
82
+ snr_db=snr_db, eps=self.eps,
83
  )
84
  mix_wave = torch.tensor(mix_wave, dtype=torch.float32)
85
  noise_wave_adjusted = torch.tensor(noise_wave_adjusted, dtype=torch.float32)
 
110
  return waveform
111
 
112
  @staticmethod
113
+ def mix_speech_and_noise(speech: np.ndarray, noise: np.ndarray, snr_db: float, eps: float = 1e-8):
114
  l1 = len(speech)
115
  l2 = len(noise)
116
  l = min(l1, l2)
 
122
  speech_power = np.mean(np.square(speech))
123
  noise_power = speech_power / (10 ** (snr_db / 10))
124
 
125
+ noise_adjusted = np.sqrt(noise_power) * noise / (np.sqrt(np.mean(noise ** 2)) + eps)
126
 
127
  noisy_signal = speech + noise_adjusted
128
 
toolbox/torch/utils/data/dataset/denoise_jsonl_dataset.py CHANGED
@@ -22,11 +22,13 @@ class DenoiseJsonlDataset(IterableDataset):
22
  resample: bool = False,
23
  max_wave_value: float = 1.0,
24
  buffer_size: int = 1000,
 
25
  ):
26
  self.jsonl_file = jsonl_file
27
  self.expected_sample_rate = expected_sample_rate
28
  self.resample = resample
29
  self.max_wave_value = max_wave_value
 
30
 
31
  self.buffer_size = buffer_size
32
  self.buffer_samples: List[dict] = list()
@@ -106,7 +108,7 @@ class DenoiseJsonlDataset(IterableDataset):
106
  mix_wave, noise_wave_adjusted = self.mix_speech_and_noise(
107
  speech=speech_wave.numpy(),
108
  noise=noise_wave.numpy(),
109
- snr_db=snr_db,
110
  )
111
  mix_wave = torch.tensor(mix_wave, dtype=torch.float32)
112
  noise_wave_adjusted = torch.tensor(noise_wave_adjusted, dtype=torch.float32)
@@ -134,7 +136,7 @@ class DenoiseJsonlDataset(IterableDataset):
134
  return waveform
135
 
136
  @staticmethod
137
- def mix_speech_and_noise(speech: np.ndarray, noise: np.ndarray, snr_db: float):
138
  l1 = len(speech)
139
  l2 = len(noise)
140
  l = min(l1, l2)
@@ -146,7 +148,7 @@ class DenoiseJsonlDataset(IterableDataset):
146
  speech_power = np.mean(np.square(speech))
147
  noise_power = speech_power / (10 ** (snr_db / 10))
148
 
149
- noise_adjusted = np.sqrt(noise_power) * noise / np.sqrt(np.mean(noise ** 2))
150
 
151
  noisy_signal = speech + noise_adjusted
152
 
 
22
  resample: bool = False,
23
  max_wave_value: float = 1.0,
24
  buffer_size: int = 1000,
25
+ eps: float = 1e-8,
26
  ):
27
  self.jsonl_file = jsonl_file
28
  self.expected_sample_rate = expected_sample_rate
29
  self.resample = resample
30
  self.max_wave_value = max_wave_value
31
+ self.eps = eps
32
 
33
  self.buffer_size = buffer_size
34
  self.buffer_samples: List[dict] = list()
 
108
  mix_wave, noise_wave_adjusted = self.mix_speech_and_noise(
109
  speech=speech_wave.numpy(),
110
  noise=noise_wave.numpy(),
111
+ snr_db=snr_db, eps=self.eps,
112
  )
113
  mix_wave = torch.tensor(mix_wave, dtype=torch.float32)
114
  noise_wave_adjusted = torch.tensor(noise_wave_adjusted, dtype=torch.float32)
 
136
  return waveform
137
 
138
  @staticmethod
139
+ def mix_speech_and_noise(speech: np.ndarray, noise: np.ndarray, snr_db: float, eps: float = 1e-8):
140
  l1 = len(speech)
141
  l2 = len(noise)
142
  l = min(l1, l2)
 
148
  speech_power = np.mean(np.square(speech))
149
  noise_power = speech_power / (10 ** (snr_db / 10))
150
 
151
+ noise_adjusted = np.sqrt(noise_power) * noise / (np.sqrt(np.mean(noise ** 2)) + eps)
152
 
153
  noisy_signal = speech + noise_adjusted
154