Spaces:

qgyd2021
/

nx_denoise

Running

HoneyTian commited on Feb 14

Commit

b8db969

1 Parent(s): f33a053

update

Files changed (1) hide show

examples/spectrum_dfnet_aishell/step_3_evaluation.py CHANGED Viewed

@@ -105,10 +105,10 @@ def enhance(mix_spec_complex: torch.Tensor,
     mask_speech = speech_irm_prediction
     mask_noise = 1.0 - speech_irm_prediction
-    print(f"mix_spec_complex: {mix_spec_complex.shape}")
-    print(f"mask_noise: {mask_noise.shape}")
-    # speech_spec = mix_spec_complex * mask_speech
     noise_spec = mix_spec_complex * mask_noise
     speech_wave = istft.forward(speech_spec_prediction)
@@ -251,14 +251,14 @@ def main():
         # mix_spec_complex shape: [batch_size, freq_dim (257), time_steps, 2]
         # speech_irm_prediction shape: [batch_size, freq_dim (256), time_steps]
-        batch_size, _, time_steps = speech_irm_prediction.shape
-        speech_irm_prediction = torch.concat(
-            [
-                speech_irm_prediction,
-                0.5*torch.ones(size=(batch_size, 1, time_steps), dtype=speech_irm_prediction.dtype).to(device)
-            ],
-            dim=1,
-        )
         # speech_irm_prediction shape: [batch_size, freq_dim (257), time_steps]
         speech_wave_enhanced, noise_wave_enhanced = enhance(mix_spec_complex, speech_spec_prediction, speech_irm_prediction)
         save_audios(noise_wave, speech_wave, mix_wave, speech_wave_enhanced, noise_wave_enhanced, args.evaluation_audio_dir)

     mask_speech = speech_irm_prediction
     mask_noise = 1.0 - speech_irm_prediction
+    # print(f"mix_spec_complex: {mix_spec_complex.shape}")
+    # print(f"mask_noise: {mask_noise.shape}")
+    speech_spec = mix_spec_complex * mask_speech
     noise_spec = mix_spec_complex * mask_noise
     speech_wave = istft.forward(speech_spec_prediction)
         # mix_spec_complex shape: [batch_size, freq_dim (257), time_steps, 2]
         # speech_irm_prediction shape: [batch_size, freq_dim (256), time_steps]
+        # batch_size, _, time_steps = speech_irm_prediction.shape
+        # speech_irm_prediction = torch.concat(
+        #     [
+        #         speech_irm_prediction,
+        #         0.5*torch.ones(size=(batch_size, 1, time_steps), dtype=speech_irm_prediction.dtype).to(device)
+        #     ],
+        #     dim=1,
+        # )
         # speech_irm_prediction shape: [batch_size, freq_dim (257), time_steps]
         speech_wave_enhanced, noise_wave_enhanced = enhance(mix_spec_complex, speech_spec_prediction, speech_irm_prediction)
         save_audios(noise_wave, speech_wave, mix_wave, speech_wave_enhanced, noise_wave_enhanced, args.evaluation_audio_dir)