Spaces:

redfr
/

VIST-UI

Running

File size: 3,457 Bytes

2b7fd6e

import io
import os
import subprocess
import tempfile
import wave

import librosa
import numpy as np
import soundfile
from scipy.io import wavfile
from scipy.io.wavfile import read


class VEX(object):
    def __init__(self):
        pass

    def load(self, audio_file):
        with wave.open(audio_file, 'rb') as wav_file:
            num_frames = wav_file.getnframes()
            audiofile_body = wav_file.readframes(num_frames)
        with io.BytesIO(audiofile_body) as file_stream:
            with wave.open(file_stream, 'rb') as wave_file:
                audio_data = wave_file.readframes(-1)
                sampling_rate = wave_file.getframerate()
                num_channels = wave_file.getnchannels()

        audio_array = np.frombuffer(audio_data, dtype=np.int16)
        audio_array = np.reshape(audio_array, (-1, num_channels))
        return sampling_rate, audio_array

    def separate(self, srcaudio=None):
        import uuid
        sampling_rate, audio = srcaudio

        # Make sure the NumPy array has an integer data type
        if not np.issubdtype(audio.dtype, np.integer):
            raise ValueError("The input NumPy array must have an integer data type.")

        temp_filebasename = f"splt_{uuid.uuid4()}"
        temp_filename = f"{temp_filebasename}.wav"
        wavfile.write(temp_filename, sampling_rate, audio)

        subprocess.run([
            'spleeter', 'separate',
            '-p', 'spleeter:2stems',
            '-o', 'output',
            temp_filename
        ])

        temp_dir = f"output/{temp_filebasename}"
        vocal_file = f"output/{temp_filebasename}/vocals.wav"
        accompaniment_file = f"output/{temp_filebasename}/accompaniment.wav"

        vocal_sampling_rate, vocal_audio = wavfile.read(vocal_file)

        # with wave.open(vocal_file, 'rb') as wav_file:
        #     num_frames = wav_file.getnframes()
        #     audiofile_body = wav_file.readframes(num_frames)
        # with io.BytesIO(audiofile_body) as file_stream:
        #     with wave.open(file_stream, 'rb') as wave_file:
        #         audio_data = wave_file.readframes(-1)
        #         vocal_sampling_rate = wave_file.getframerate()
        #         num_channels = wave_file.getnchannels()
        #
        # vocal_audio = np.frombuffer(audio_data, dtype=np.int16)
        # vocal_audio = np.reshape(vocal_audio, (-1, num_channels))

        accompaniment_sampling_rate, accompaniment_audio = wavfile.read(accompaniment_file)

        # with wave.open(accompaniment_file, 'rb') as wav_file:
        #     num_frames = wav_file.getnframes()
        #     audiofile_body = wav_file.readframes(num_frames)
        # with io.BytesIO(audiofile_body) as file_stream:
        #     with wave.open(file_stream, 'rb') as wave_file:
        #         audio_data = wave_file.readframes(-1)
        #         accompaniment_sampling_rate = wave_file.getframerate()
        #         num_channels = wave_file.getnchannels()
        #
        # accompaniment_audio = np.frombuffer(audio_data, dtype=np.int16)
        # accompaniment_audio = np.reshape(accompaniment_audio, (-1, num_channels))

        os.remove(temp_filename)
        os.remove(vocal_file)
        os.remove(accompaniment_file)
        os.rmdir(temp_dir)

        return [(vocal_sampling_rate, vocal_audio), (accompaniment_sampling_rate, accompaniment_audio)]


if __name__ == "__main__":
    v = VEX()
    # srcaudio = v.load("")
    v.separate()