|
import os, sys |
|
import librosa |
|
import soundfile as sf |
|
import numpy as np |
|
import re |
|
import unicodedata |
|
import wget |
|
import subprocess |
|
from pydub import AudioSegment |
|
import tempfile |
|
from torch import nn |
|
|
|
import logging |
|
from transformers import HubertModel |
|
import warnings |
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
repo = "NeoPy/Resources" |
|
logging.getLogger("fairseq").setLevel(logging.ERROR) |
|
logging.getLogger("faiss.loader").setLevel(logging.ERROR) |
|
logging.getLogger("transformers").setLevel(logging.ERROR) |
|
logging.getLogger("torch").setLevel(logging.ERROR) |
|
|
|
now_dir = os.getcwd() |
|
sys.path.append(now_dir) |
|
|
|
base_path = os.path.join(now_dir, "rvc", "models", "formant", "stftpitchshift") |
|
stft = base_path + ".exe" if sys.platform == "win32" else base_path |
|
|
|
|
|
class HubertModelWithFinalProj(HubertModel): |
|
def __init__(self, config): |
|
super().__init__(config) |
|
self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size) |
|
|
|
|
|
def load_audio(file, sample_rate): |
|
try: |
|
file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") |
|
audio, sr = sf.read(file) |
|
if len(audio.shape) > 1: |
|
audio = librosa.to_mono(audio.T) |
|
if sr != sample_rate: |
|
audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate) |
|
except Exception as error: |
|
raise RuntimeError(f"An error occurred loading the audio: {error}") |
|
|
|
return audio.flatten() |
|
|
|
|
|
def load_audio_infer( |
|
file, |
|
sample_rate, |
|
**kwargs, |
|
): |
|
formant_shifting = kwargs.get("formant_shifting", False) |
|
try: |
|
file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") |
|
if not os.path.isfile(file): |
|
raise FileNotFoundError(f"File not found: {file}") |
|
audio, sr = sf.read(file) |
|
if len(audio.shape) > 1: |
|
audio = librosa.to_mono(audio.T) |
|
if sr != sample_rate: |
|
audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate) |
|
if formant_shifting: |
|
formant_qfrency = kwargs.get("formant_qfrency", 0.8) |
|
formant_timbre = kwargs.get("formant_timbre", 0.8) |
|
|
|
from stftpitchshift import StftPitchShift |
|
|
|
pitchshifter = StftPitchShift(1024, 32, sample_rate) |
|
audio = pitchshifter.shiftpitch( |
|
audio, |
|
factors=1, |
|
quefrency=formant_qfrency * 1e-3, |
|
distortion=formant_timbre, |
|
) |
|
except Exception as error: |
|
raise RuntimeError(f"An error occurred loading the audio: {error}") |
|
return np.array(audio).flatten() |
|
|
|
|
|
def format_title(title): |
|
formatted_title = ( |
|
unicodedata.normalize("NFKD", title).encode("ascii", "ignore").decode("utf-8") |
|
) |
|
formatted_title = re.sub(r"[\u2500-\u257F]+", "", formatted_title) |
|
formatted_title = re.sub(r"[^\w\s.-]", "", formatted_title) |
|
formatted_title = re.sub(r"\s+", "_", formatted_title) |
|
return formatted_title |
|
|
|
|
|
def load_embedding(embedder_model, custom_embedder=None): |
|
embedder_root = os.path.join(now_dir, "rvc", "models", "embedders") |
|
embedding_list = { |
|
"contentvec": os.path.join(embedder_root, "contentvec"), |
|
"chinese-hubert-base": os.path.join(embedder_root, "chinese_hubert_base"), |
|
"japanese-hubert-base": os.path.join(embedder_root, "japanese_hubert_base"), |
|
"korean-hubert-base": os.path.join(embedder_root, "korean_hubert_base"), |
|
} |
|
|
|
online_embedders = { |
|
"contentvec": f"https://huggingface.co/{repo}/resolve/main/Resources/embedders/contentvec/pytorch_model.bin", |
|
"chinese-hubert-base": f"https://huggingface.co/{repo}/resolve/main/Resources/embedders/chinese_hubert_base/pytorch_model.bin", |
|
"japanese-hubert-base": f"https://huggingface.co/{repo}/resolve/main/Resources/embedders/japanese_hubert_base/pytorch_model.bin", |
|
"korean-hubert-base": f"https://huggingface.co/{repo}/resolve/main/Resources/embedders/korean_hubert_base/pytorch_model.bin", |
|
} |
|
|
|
config_files = { |
|
"contentvec": f"https://huggingface.co/{repo}/resolve/main/Resources/embedders/contentvec/config.json", |
|
"chinese-hubert-base": f"https://huggingface.co/{repo}/resolve/main/Resources/embedders/chinese_hubert_base/config.json", |
|
"japanese-hubert-base": f"https://huggingface.co/IAHispano/{repo}/resolve/main/Resources/embedders/japanese_hubert_base/config.json", |
|
"korean-hubert-base": f"https://huggingface.co/IAHispano/{repo}/resolve/main/Resources/embedders/korean_hubert_base/config.json", |
|
} |
|
|
|
if embedder_model == "custom": |
|
if os.path.exists(custom_embedder): |
|
model_path = custom_embedder |
|
else: |
|
print(f"Custom embedder not found: {custom_embedder}, using contentvec") |
|
model_path = embedding_list["contentvec"] |
|
else: |
|
model_path = embedding_list[embedder_model] |
|
bin_file = os.path.join(model_path, "pytorch_model.bin") |
|
json_file = os.path.join(model_path, "config.json") |
|
os.makedirs(model_path, exist_ok=True) |
|
if not os.path.exists(bin_file): |
|
url = online_embedders[embedder_model] |
|
print(f"Downloading {url} to {model_path}...") |
|
wget.download(url, out=bin_file) |
|
if not os.path.exists(json_file): |
|
url = config_files[embedder_model] |
|
print(f"Downloading {url} to {model_path}...") |
|
wget.download(url, out=json_file) |
|
|
|
models = HubertModelWithFinalProj.from_pretrained(model_path) |
|
return models |
|
|