Spaces:

bunyaminergen
/

CallyticsDemo

Running

File size: 9,252 Bytes

1b97239

# Standard library imports
import os
from typing import Annotated

# Related third-party imports
import librosa
import soundfile as sf
from librosa.feature import rms
from omegaconf import OmegaConf
from noisereduce import reduce_noise
from MPSENet import MPSENet

# Local imports
from src.utils.utils import Logger


class Denoiser:
    """
    A class to handle audio denoising using librosa and noisereduce.

    This class provides methods to load noisy audio, apply denoising, and
    save the cleaned output to disk.

    Parameters
    ----------
    config_path : str
        Path to the configuration file that specifies runtime settings.
    output_dir : str, optional
        Directory to save cleaned audio files. Defaults to ".temp".

    Attributes
    ----------
    config : omegaconf.DictConfig
        Loaded configuration data.
    output_dir : str
        Directory to save cleaned audio files.
    logger : Logger
        Logger instance for recording messages.
    """

    def __init__(self, config_path: Annotated[str, "Path to the config file"],
                 output_dir: Annotated[str, "Default directory to save cleaned audio files"] = ".temp") -> None:
        """
        Initialize the Denoiser class.

        Parameters
        ----------
        config_path : str
            Path to the configuration file that specifies runtime settings.
        output_dir : str, optional
            Default directory to save cleaned audio files. Defaults to ".temp".
        """
        self.config = OmegaConf.load(config_path)
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)
        self.logger = Logger(name="DenoiserLogger")

    def denoise_audio(
            self,
            input_path: Annotated[str, "Path to the noisy audio file"],
            output_dir: Annotated[str, "Directory to save the cleaned audio file"],
            noise_threshold: Annotated[float, "Noise threshold value to decide if denoising is needed"],
            print_output: Annotated[bool, "Whether to log the process to console"] = False,
    ) -> str:
        """
        Denoise an audio file using noisereduce and librosa.

        Parameters
        ----------
        input_path : str
            Path to the noisy input audio file.
        output_dir : str
            Directory to save the cleaned audio file.
        noise_threshold : float
            Noise threshold value to decide if denoising is needed.
        print_output : bool, optional
            Whether to log the process to the console. Defaults to False.

        Returns
        -------
        str
            Path to the saved audio file if denoising is performed, otherwise the original audio file path.

        Examples
        --------
        >>> denoise = Denoiser("config.yaml")
        >>> input_file = "noisy_audio.wav"
        >>> output_directory = "cleaned_audio"
        >>> noise_thresh = 0.02
        >>> result = denoiser.denoise_audio(input_file, output_directory, noise_thresh)
        >>> print(result)
        cleaned_audio/denoised.wav
        """
        self.logger.log(f"Loading: {input_path}", print_output=print_output)

        noisy_waveform, sr = librosa.load(input_path, sr=None)

        noise_level = rms(y=noisy_waveform).mean()
        self.logger.log(f"Calculated noise level: {noise_level}", print_output=print_output)

        if noise_level < noise_threshold:
            self.logger.log("Noise level is below the threshold. Skipping denoising.", print_output=print_output)
            return input_path

        self.logger.log("Denoising process started...", print_output=print_output)

        cleaned_waveform = reduce_noise(y=noisy_waveform, sr=sr)

        output_path = os.path.join(output_dir, "denoised.wav")

        os.makedirs(output_dir, exist_ok=True)

        sf.write(output_path, cleaned_waveform, sr)

        self.logger.log(f"Denoising completed! Cleaned file: {output_path}", print_output=print_output)

        return output_path


class SpeechEnhancement:
    """
    A class for speech enhancement using the MPSENet model.

    This class provides methods to load audio, apply enhancement using a
    pre-trained MPSENet model, and save the enhanced output.

    Parameters
    ----------
    config_path : str
        Path to the configuration file specifying runtime settings.
    output_dir : str, optional
        Directory to save enhanced audio files. Defaults to ".temp".

    Attributes
    ----------
    config : omegaconf.DictConfig
        Loaded configuration data.
    output_dir : str
        Directory to save enhanced audio files.
    model_name : str
        Name of the pre-trained model.
    device : str
        Device to run the model (e.g., "cpu" or "cuda").
    model : MPSENet
        Pre-trained MPSENet model instance.
    """

    def __init__(
            self,
            config_path: Annotated[str, "Path to the config file"],
            output_dir: Annotated[str, "Default directory to save enhanced audio files"] = ".temp"
    ) -> None:
        """
        Initialize the SpeechEnhancement class.

        Parameters
        ----------
        config_path : str
            Path to the configuration file specifying runtime settings.
        output_dir : str, optional
            Directory to save enhanced audio files. Defaults to ".temp".
        """
        self.config = OmegaConf.load(config_path)
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)

        self.model_name = self.config.models.mpsenet.model_name
        self.device = self.config.runtime.device

        self.model = MPSENet.from_pretrained(self.model_name).to(self.device)

    def enhance_audio(
            self,
            input_path: Annotated[str, "Path to the original audio file"],
            output_path: Annotated[str, "Path to save the enhanced audio file"],
            noise_threshold: Annotated[float, "Noise threshold value to decide if enhancement is needed"],
            verbose: Annotated[bool, "Whether to log additional info to console"] = False,
    ) -> str:
        """
        Enhance an audio file using the MPSENet model.

        Parameters
        ----------
        input_path : str
            Path to the original input audio file.
        output_path : str
            Path to save the enhanced audio file.
        noise_threshold : float
            Noise threshold value to decide if enhancement is needed.
        verbose : bool, optional
            Whether to log additional info to the console. Defaults to False.

        Returns
        -------
        str
            Path to the enhanced audio file if enhancement is performed, otherwise the original file path.

        Examples
        --------
        >>> enhancer = SpeechEnhancement("config.yaml")
        >>> input_file = "raw_audio.wav"
        >>> output_file = "enhanced_audio.wav"
        >>> noise_thresh = 0.03
        >>> result = enhancer.enhance_audio(input_file, output_file, noise_thresh)
        >>> print(result)
        enhanced_audio.wav
        """
        raw_waveform, sr_raw = librosa.load(input_path, sr=None)
        noise_level = rms(y=raw_waveform).mean()

        if verbose:
            print(f"[SpeechEnhancement] Detected noise level: {noise_level:.6f}")

        if noise_level < noise_threshold:
            if verbose:
                print(f"[SpeechEnhancement] Noise level < {noise_threshold} → enhancement skipped.")
            return input_path

        sr_model = self.model.h.sampling_rate
        waveform, sr = librosa.load(input_path, sr=sr_model)

        if verbose:
            print(f"[SpeechEnhancement] Enhancement with MPSENet started using model: {self.model_name}")

        enhanced_waveform, sr_out, _ = self.model(waveform)

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        sf.write(output_path, enhanced_waveform, sr_out)

        if verbose:
            print(f"[SpeechEnhancement] Enhancement complete. Saved to: {output_path}")

        return output_path


if __name__ == "__main__":

    test_config_path = "config/config.yaml"
    noisy_audio_file = ".data/example/noisy/LookOncetoHearTargetSpeechHearingwithNoisyExamples.mp3"
    temp_dir = ".temp"

    denoiser = Denoiser(config_path=test_config_path, output_dir=temp_dir)
    denoised_path = denoiser.denoise_audio(
        input_path=noisy_audio_file,
        output_dir=temp_dir,
        noise_threshold=0.005,
        print_output=True
    )
    if denoised_path == noisy_audio_file:
        print("Denoising skipped due to low noise level.")
    else:
        print(f"Denoising completed! Cleaned file saved at: {denoised_path}")

    speech_enhancer = SpeechEnhancement(config_path=test_config_path, output_dir=temp_dir)
    enhanced_audio_path = os.path.join(temp_dir, "enhanced_audio.wav")

    result_path = speech_enhancer.enhance_audio(
        input_path=denoised_path,
        output_path=enhanced_audio_path,
        noise_threshold=0.005,
        verbose=True
    )

    if result_path == denoised_path:
        print("Enhancement skipped due to low noise level.")
    else:
        print(f"Speech enhancement completed! Enhanced file saved at: {result_path}")