Spaces:
Running
Running
update
Browse files
toolbox/torch/utils/data/dataset/denoise_excel_dataset.py
CHANGED
@@ -18,11 +18,13 @@ class DenoiseExcelDataset(Dataset):
|
|
18 |
expected_sample_rate: int,
|
19 |
resample: bool = False,
|
20 |
max_wave_value: float = 1.0,
|
|
|
21 |
):
|
22 |
self.excel_file = excel_file
|
23 |
self.expected_sample_rate = expected_sample_rate
|
24 |
self.resample = resample
|
25 |
self.max_wave_value = max_wave_value
|
|
|
26 |
|
27 |
self.samples = self.load_samples(excel_file)
|
28 |
|
@@ -77,7 +79,7 @@ class DenoiseExcelDataset(Dataset):
|
|
77 |
mix_wave, noise_wave_adjusted = self.mix_speech_and_noise(
|
78 |
speech=speech_wave.numpy(),
|
79 |
noise=noise_wave.numpy(),
|
80 |
-
snr_db=snr_db,
|
81 |
)
|
82 |
mix_wave = torch.tensor(mix_wave, dtype=torch.float32)
|
83 |
noise_wave_adjusted = torch.tensor(noise_wave_adjusted, dtype=torch.float32)
|
@@ -108,7 +110,7 @@ class DenoiseExcelDataset(Dataset):
|
|
108 |
return waveform
|
109 |
|
110 |
@staticmethod
|
111 |
-
def mix_speech_and_noise(speech: np.ndarray, noise: np.ndarray, snr_db: float):
|
112 |
l1 = len(speech)
|
113 |
l2 = len(noise)
|
114 |
l = min(l1, l2)
|
@@ -120,7 +122,7 @@ class DenoiseExcelDataset(Dataset):
|
|
120 |
speech_power = np.mean(np.square(speech))
|
121 |
noise_power = speech_power / (10 ** (snr_db / 10))
|
122 |
|
123 |
-
noise_adjusted = np.sqrt(noise_power) * noise / np.sqrt(np.mean(noise ** 2))
|
124 |
|
125 |
noisy_signal = speech + noise_adjusted
|
126 |
|
|
|
18 |
expected_sample_rate: int,
|
19 |
resample: bool = False,
|
20 |
max_wave_value: float = 1.0,
|
21 |
+
eps: float = 1e-8,
|
22 |
):
|
23 |
self.excel_file = excel_file
|
24 |
self.expected_sample_rate = expected_sample_rate
|
25 |
self.resample = resample
|
26 |
self.max_wave_value = max_wave_value
|
27 |
+
self.eps = eps
|
28 |
|
29 |
self.samples = self.load_samples(excel_file)
|
30 |
|
|
|
79 |
mix_wave, noise_wave_adjusted = self.mix_speech_and_noise(
|
80 |
speech=speech_wave.numpy(),
|
81 |
noise=noise_wave.numpy(),
|
82 |
+
snr_db=snr_db, eps=self.eps,
|
83 |
)
|
84 |
mix_wave = torch.tensor(mix_wave, dtype=torch.float32)
|
85 |
noise_wave_adjusted = torch.tensor(noise_wave_adjusted, dtype=torch.float32)
|
|
|
110 |
return waveform
|
111 |
|
112 |
@staticmethod
|
113 |
+
def mix_speech_and_noise(speech: np.ndarray, noise: np.ndarray, snr_db: float, eps: float = 1e-8):
|
114 |
l1 = len(speech)
|
115 |
l2 = len(noise)
|
116 |
l = min(l1, l2)
|
|
|
122 |
speech_power = np.mean(np.square(speech))
|
123 |
noise_power = speech_power / (10 ** (snr_db / 10))
|
124 |
|
125 |
+
noise_adjusted = np.sqrt(noise_power) * noise / (np.sqrt(np.mean(noise ** 2)) + eps)
|
126 |
|
127 |
noisy_signal = speech + noise_adjusted
|
128 |
|
toolbox/torch/utils/data/dataset/denoise_jsonl_dataset.py
CHANGED
@@ -22,11 +22,13 @@ class DenoiseJsonlDataset(IterableDataset):
|
|
22 |
resample: bool = False,
|
23 |
max_wave_value: float = 1.0,
|
24 |
buffer_size: int = 1000,
|
|
|
25 |
):
|
26 |
self.jsonl_file = jsonl_file
|
27 |
self.expected_sample_rate = expected_sample_rate
|
28 |
self.resample = resample
|
29 |
self.max_wave_value = max_wave_value
|
|
|
30 |
|
31 |
self.buffer_size = buffer_size
|
32 |
self.buffer_samples: List[dict] = list()
|
@@ -106,7 +108,7 @@ class DenoiseJsonlDataset(IterableDataset):
|
|
106 |
mix_wave, noise_wave_adjusted = self.mix_speech_and_noise(
|
107 |
speech=speech_wave.numpy(),
|
108 |
noise=noise_wave.numpy(),
|
109 |
-
snr_db=snr_db,
|
110 |
)
|
111 |
mix_wave = torch.tensor(mix_wave, dtype=torch.float32)
|
112 |
noise_wave_adjusted = torch.tensor(noise_wave_adjusted, dtype=torch.float32)
|
@@ -134,7 +136,7 @@ class DenoiseJsonlDataset(IterableDataset):
|
|
134 |
return waveform
|
135 |
|
136 |
@staticmethod
|
137 |
-
def mix_speech_and_noise(speech: np.ndarray, noise: np.ndarray, snr_db: float):
|
138 |
l1 = len(speech)
|
139 |
l2 = len(noise)
|
140 |
l = min(l1, l2)
|
@@ -146,7 +148,7 @@ class DenoiseJsonlDataset(IterableDataset):
|
|
146 |
speech_power = np.mean(np.square(speech))
|
147 |
noise_power = speech_power / (10 ** (snr_db / 10))
|
148 |
|
149 |
-
noise_adjusted = np.sqrt(noise_power) * noise / np.sqrt(np.mean(noise ** 2))
|
150 |
|
151 |
noisy_signal = speech + noise_adjusted
|
152 |
|
|
|
22 |
resample: bool = False,
|
23 |
max_wave_value: float = 1.0,
|
24 |
buffer_size: int = 1000,
|
25 |
+
eps: float = 1e-8,
|
26 |
):
|
27 |
self.jsonl_file = jsonl_file
|
28 |
self.expected_sample_rate = expected_sample_rate
|
29 |
self.resample = resample
|
30 |
self.max_wave_value = max_wave_value
|
31 |
+
self.eps = eps
|
32 |
|
33 |
self.buffer_size = buffer_size
|
34 |
self.buffer_samples: List[dict] = list()
|
|
|
108 |
mix_wave, noise_wave_adjusted = self.mix_speech_and_noise(
|
109 |
speech=speech_wave.numpy(),
|
110 |
noise=noise_wave.numpy(),
|
111 |
+
snr_db=snr_db, eps=self.eps,
|
112 |
)
|
113 |
mix_wave = torch.tensor(mix_wave, dtype=torch.float32)
|
114 |
noise_wave_adjusted = torch.tensor(noise_wave_adjusted, dtype=torch.float32)
|
|
|
136 |
return waveform
|
137 |
|
138 |
@staticmethod
|
139 |
+
def mix_speech_and_noise(speech: np.ndarray, noise: np.ndarray, snr_db: float, eps: float = 1e-8):
|
140 |
l1 = len(speech)
|
141 |
l2 = len(noise)
|
142 |
l = min(l1, l2)
|
|
|
148 |
speech_power = np.mean(np.square(speech))
|
149 |
noise_power = speech_power / (10 ** (snr_db / 10))
|
150 |
|
151 |
+
noise_adjusted = np.sqrt(noise_power) * noise / (np.sqrt(np.mean(noise ** 2)) + eps)
|
152 |
|
153 |
noisy_signal = speech + noise_adjusted
|
154 |
|