File size: 9,252 Bytes
1b97239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# Standard library imports
import os
from typing import Annotated

# Related third-party imports
import librosa
import soundfile as sf
from librosa.feature import rms
from omegaconf import OmegaConf
from noisereduce import reduce_noise
from MPSENet import MPSENet

# Local imports
from src.utils.utils import Logger


class Denoiser:
    """
    A class to handle audio denoising using librosa and noisereduce.

    This class provides methods to load noisy audio, apply denoising, and
    save the cleaned output to disk.

    Parameters
    ----------
    config_path : str
        Path to the configuration file that specifies runtime settings.
    output_dir : str, optional
        Directory to save cleaned audio files. Defaults to ".temp".

    Attributes
    ----------
    config : omegaconf.DictConfig
        Loaded configuration data.
    output_dir : str
        Directory to save cleaned audio files.
    logger : Logger
        Logger instance for recording messages.
    """

    def __init__(self, config_path: Annotated[str, "Path to the config file"],
                 output_dir: Annotated[str, "Default directory to save cleaned audio files"] = ".temp") -> None:
        """
        Initialize the Denoiser class.

        Parameters
        ----------
        config_path : str
            Path to the configuration file that specifies runtime settings.
        output_dir : str, optional
            Default directory to save cleaned audio files. Defaults to ".temp".
        """
        self.config = OmegaConf.load(config_path)
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)
        self.logger = Logger(name="DenoiserLogger")

    def denoise_audio(
            self,
            input_path: Annotated[str, "Path to the noisy audio file"],
            output_dir: Annotated[str, "Directory to save the cleaned audio file"],
            noise_threshold: Annotated[float, "Noise threshold value to decide if denoising is needed"],
            print_output: Annotated[bool, "Whether to log the process to console"] = False,
    ) -> str:
        """
        Denoise an audio file using noisereduce and librosa.

        Parameters
        ----------
        input_path : str
            Path to the noisy input audio file.
        output_dir : str
            Directory to save the cleaned audio file.
        noise_threshold : float
            Noise threshold value to decide if denoising is needed.
        print_output : bool, optional
            Whether to log the process to the console. Defaults to False.

        Returns
        -------
        str
            Path to the saved audio file if denoising is performed, otherwise the original audio file path.

        Examples
        --------
        >>> denoise = Denoiser("config.yaml")
        >>> input_file = "noisy_audio.wav"
        >>> output_directory = "cleaned_audio"
        >>> noise_thresh = 0.02
        >>> result = denoiser.denoise_audio(input_file, output_directory, noise_thresh)
        >>> print(result)
        cleaned_audio/denoised.wav
        """
        self.logger.log(f"Loading: {input_path}", print_output=print_output)

        noisy_waveform, sr = librosa.load(input_path, sr=None)

        noise_level = rms(y=noisy_waveform).mean()
        self.logger.log(f"Calculated noise level: {noise_level}", print_output=print_output)

        if noise_level < noise_threshold:
            self.logger.log("Noise level is below the threshold. Skipping denoising.", print_output=print_output)
            return input_path

        self.logger.log("Denoising process started...", print_output=print_output)

        cleaned_waveform = reduce_noise(y=noisy_waveform, sr=sr)

        output_path = os.path.join(output_dir, "denoised.wav")

        os.makedirs(output_dir, exist_ok=True)

        sf.write(output_path, cleaned_waveform, sr)

        self.logger.log(f"Denoising completed! Cleaned file: {output_path}", print_output=print_output)

        return output_path


class SpeechEnhancement:
    """
    A class for speech enhancement using the MPSENet model.

    This class provides methods to load audio, apply enhancement using a
    pre-trained MPSENet model, and save the enhanced output.

    Parameters
    ----------
    config_path : str
        Path to the configuration file specifying runtime settings.
    output_dir : str, optional
        Directory to save enhanced audio files. Defaults to ".temp".

    Attributes
    ----------
    config : omegaconf.DictConfig
        Loaded configuration data.
    output_dir : str
        Directory to save enhanced audio files.
    model_name : str
        Name of the pre-trained model.
    device : str
        Device to run the model (e.g., "cpu" or "cuda").
    model : MPSENet
        Pre-trained MPSENet model instance.
    """

    def __init__(
            self,
            config_path: Annotated[str, "Path to the config file"],
            output_dir: Annotated[str, "Default directory to save enhanced audio files"] = ".temp"
    ) -> None:
        """
        Initialize the SpeechEnhancement class.

        Parameters
        ----------
        config_path : str
            Path to the configuration file specifying runtime settings.
        output_dir : str, optional
            Directory to save enhanced audio files. Defaults to ".temp".
        """
        self.config = OmegaConf.load(config_path)
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)

        self.model_name = self.config.models.mpsenet.model_name
        self.device = self.config.runtime.device

        self.model = MPSENet.from_pretrained(self.model_name).to(self.device)

    def enhance_audio(
            self,
            input_path: Annotated[str, "Path to the original audio file"],
            output_path: Annotated[str, "Path to save the enhanced audio file"],
            noise_threshold: Annotated[float, "Noise threshold value to decide if enhancement is needed"],
            verbose: Annotated[bool, "Whether to log additional info to console"] = False,
    ) -> str:
        """
        Enhance an audio file using the MPSENet model.

        Parameters
        ----------
        input_path : str
            Path to the original input audio file.
        output_path : str
            Path to save the enhanced audio file.
        noise_threshold : float
            Noise threshold value to decide if enhancement is needed.
        verbose : bool, optional
            Whether to log additional info to the console. Defaults to False.

        Returns
        -------
        str
            Path to the enhanced audio file if enhancement is performed, otherwise the original file path.

        Examples
        --------
        >>> enhancer = SpeechEnhancement("config.yaml")
        >>> input_file = "raw_audio.wav"
        >>> output_file = "enhanced_audio.wav"
        >>> noise_thresh = 0.03
        >>> result = enhancer.enhance_audio(input_file, output_file, noise_thresh)
        >>> print(result)
        enhanced_audio.wav
        """
        raw_waveform, sr_raw = librosa.load(input_path, sr=None)
        noise_level = rms(y=raw_waveform).mean()

        if verbose:
            print(f"[SpeechEnhancement] Detected noise level: {noise_level:.6f}")

        if noise_level < noise_threshold:
            if verbose:
                print(f"[SpeechEnhancement] Noise level < {noise_threshold} → enhancement skipped.")
            return input_path

        sr_model = self.model.h.sampling_rate
        waveform, sr = librosa.load(input_path, sr=sr_model)

        if verbose:
            print(f"[SpeechEnhancement] Enhancement with MPSENet started using model: {self.model_name}")

        enhanced_waveform, sr_out, _ = self.model(waveform)

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        sf.write(output_path, enhanced_waveform, sr_out)

        if verbose:
            print(f"[SpeechEnhancement] Enhancement complete. Saved to: {output_path}")

        return output_path


if __name__ == "__main__":

    test_config_path = "config/config.yaml"
    noisy_audio_file = ".data/example/noisy/LookOncetoHearTargetSpeechHearingwithNoisyExamples.mp3"
    temp_dir = ".temp"

    denoiser = Denoiser(config_path=test_config_path, output_dir=temp_dir)
    denoised_path = denoiser.denoise_audio(
        input_path=noisy_audio_file,
        output_dir=temp_dir,
        noise_threshold=0.005,
        print_output=True
    )
    if denoised_path == noisy_audio_file:
        print("Denoising skipped due to low noise level.")
    else:
        print(f"Denoising completed! Cleaned file saved at: {denoised_path}")

    speech_enhancer = SpeechEnhancement(config_path=test_config_path, output_dir=temp_dir)
    enhanced_audio_path = os.path.join(temp_dir, "enhanced_audio.wav")

    result_path = speech_enhancer.enhance_audio(
        input_path=denoised_path,
        output_path=enhanced_audio_path,
        noise_threshold=0.005,
        verbose=True
    )

    if result_path == denoised_path:
        print("Enhancement skipped due to low noise level.")
    else:
        print(f"Speech enhancement completed! Enhanced file saved at: {result_path}")