File size: 3,457 Bytes
2b7fd6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import io
import os
import subprocess
import tempfile
import wave
import librosa
import numpy as np
import soundfile
from scipy.io import wavfile
from scipy.io.wavfile import read
class VEX(object):
def __init__(self):
pass
def load(self, audio_file):
with wave.open(audio_file, 'rb') as wav_file:
num_frames = wav_file.getnframes()
audiofile_body = wav_file.readframes(num_frames)
with io.BytesIO(audiofile_body) as file_stream:
with wave.open(file_stream, 'rb') as wave_file:
audio_data = wave_file.readframes(-1)
sampling_rate = wave_file.getframerate()
num_channels = wave_file.getnchannels()
audio_array = np.frombuffer(audio_data, dtype=np.int16)
audio_array = np.reshape(audio_array, (-1, num_channels))
return sampling_rate, audio_array
def separate(self, srcaudio=None):
import uuid
sampling_rate, audio = srcaudio
# Make sure the NumPy array has an integer data type
if not np.issubdtype(audio.dtype, np.integer):
raise ValueError("The input NumPy array must have an integer data type.")
temp_filebasename = f"splt_{uuid.uuid4()}"
temp_filename = f"{temp_filebasename}.wav"
wavfile.write(temp_filename, sampling_rate, audio)
subprocess.run([
'spleeter', 'separate',
'-p', 'spleeter:2stems',
'-o', 'output',
temp_filename
])
temp_dir = f"output/{temp_filebasename}"
vocal_file = f"output/{temp_filebasename}/vocals.wav"
accompaniment_file = f"output/{temp_filebasename}/accompaniment.wav"
vocal_sampling_rate, vocal_audio = wavfile.read(vocal_file)
# with wave.open(vocal_file, 'rb') as wav_file:
# num_frames = wav_file.getnframes()
# audiofile_body = wav_file.readframes(num_frames)
# with io.BytesIO(audiofile_body) as file_stream:
# with wave.open(file_stream, 'rb') as wave_file:
# audio_data = wave_file.readframes(-1)
# vocal_sampling_rate = wave_file.getframerate()
# num_channels = wave_file.getnchannels()
#
# vocal_audio = np.frombuffer(audio_data, dtype=np.int16)
# vocal_audio = np.reshape(vocal_audio, (-1, num_channels))
accompaniment_sampling_rate, accompaniment_audio = wavfile.read(accompaniment_file)
# with wave.open(accompaniment_file, 'rb') as wav_file:
# num_frames = wav_file.getnframes()
# audiofile_body = wav_file.readframes(num_frames)
# with io.BytesIO(audiofile_body) as file_stream:
# with wave.open(file_stream, 'rb') as wave_file:
# audio_data = wave_file.readframes(-1)
# accompaniment_sampling_rate = wave_file.getframerate()
# num_channels = wave_file.getnchannels()
#
# accompaniment_audio = np.frombuffer(audio_data, dtype=np.int16)
# accompaniment_audio = np.reshape(accompaniment_audio, (-1, num_channels))
os.remove(temp_filename)
os.remove(vocal_file)
os.remove(accompaniment_file)
os.rmdir(temp_dir)
return [(vocal_sampling_rate, vocal_audio), (accompaniment_sampling_rate, accompaniment_audio)]
if __name__ == "__main__":
v = VEX()
# srcaudio = v.load("")
v.separate()
|