|
import os |
|
import sys |
|
import json |
|
import argparse |
|
import subprocess |
|
from functools import lru_cache |
|
from distutils.util import strtobool |
|
|
|
now_dir = os.getcwd() |
|
sys.path.append(now_dir) |
|
|
|
current_script_directory = os.path.dirname(os.path.realpath(__file__)) |
|
logs_path = os.path.join(current_script_directory, "logs") |
|
|
|
from rvc.lib.tools.prerequisites_download import prequisites_download_pipeline |
|
|
|
python = sys.executable |
|
|
|
|
|
|
|
@lru_cache(maxsize=1) |
|
def load_voices_data(): |
|
with open( |
|
os.path.join("rvc", "lib", "tools", "tts_voices.json"), "r", encoding="utf-8" |
|
) as file: |
|
return json.load(file) |
|
|
|
|
|
voices_data = load_voices_data() |
|
locales = list({voice["ShortName"] for voice in voices_data}) |
|
|
|
|
|
@lru_cache(maxsize=None) |
|
def import_voice_converter(): |
|
from rvc.infer.infer import VoiceConverter |
|
|
|
return VoiceConverter() |
|
|
|
|
|
@lru_cache(maxsize=1) |
|
def get_config(): |
|
from rvc.configs.config import Config |
|
|
|
return Config() |
|
|
|
|
|
|
|
def run_infer_script( |
|
pitch: int, |
|
filter_radius: int, |
|
index_rate: float, |
|
volume_envelope: int, |
|
protect: float, |
|
hop_length: int, |
|
f0_method: str, |
|
input_path: str, |
|
output_path: str, |
|
pth_path: str, |
|
index_path: str, |
|
split_audio: bool, |
|
f0_autotune: bool, |
|
f0_autotune_strength: float, |
|
clean_audio: bool, |
|
clean_strength: float, |
|
export_format: str, |
|
f0_file: str, |
|
embedder_model: str, |
|
embedder_model_custom: str = None, |
|
formant_shifting: bool = False, |
|
formant_qfrency: float = 1.0, |
|
formant_timbre: float = 1.0, |
|
post_process: bool = False, |
|
reverb: bool = False, |
|
pitch_shift: bool = False, |
|
limiter: bool = False, |
|
gain: bool = False, |
|
distortion: bool = False, |
|
chorus: bool = False, |
|
bitcrush: bool = False, |
|
clipping: bool = False, |
|
compressor: bool = False, |
|
delay: bool = False, |
|
reverb_room_size: float = 0.5, |
|
reverb_damping: float = 0.5, |
|
reverb_wet_gain: float = 0.5, |
|
reverb_dry_gain: float = 0.5, |
|
reverb_width: float = 0.5, |
|
reverb_freeze_mode: float = 0.5, |
|
pitch_shift_semitones: float = 0.0, |
|
limiter_threshold: float = -6, |
|
limiter_release_time: float = 0.01, |
|
gain_db: float = 0.0, |
|
distortion_gain: float = 25, |
|
chorus_rate: float = 1.0, |
|
chorus_depth: float = 0.25, |
|
chorus_center_delay: float = 7, |
|
chorus_feedback: float = 0.0, |
|
chorus_mix: float = 0.5, |
|
bitcrush_bit_depth: int = 8, |
|
clipping_threshold: float = -6, |
|
compressor_threshold: float = 0, |
|
compressor_ratio: float = 1, |
|
compressor_attack: float = 1.0, |
|
compressor_release: float = 100, |
|
delay_seconds: float = 0.5, |
|
delay_feedback: float = 0.0, |
|
delay_mix: float = 0.5, |
|
sid: int = 0, |
|
): |
|
kwargs = { |
|
"audio_input_path": input_path, |
|
"audio_output_path": output_path, |
|
"model_path": pth_path, |
|
"index_path": index_path, |
|
"pitch": pitch, |
|
"filter_radius": filter_radius, |
|
"index_rate": index_rate, |
|
"volume_envelope": volume_envelope, |
|
"protect": protect, |
|
"hop_length": hop_length, |
|
"f0_method": f0_method, |
|
"pth_path": pth_path, |
|
"index_path": index_path, |
|
"split_audio": split_audio, |
|
"f0_autotune": f0_autotune, |
|
"f0_autotune_strength": f0_autotune_strength, |
|
"clean_audio": clean_audio, |
|
"clean_strength": clean_strength, |
|
"export_format": export_format, |
|
"f0_file": f0_file, |
|
"embedder_model": embedder_model, |
|
"embedder_model_custom": embedder_model_custom, |
|
"post_process": post_process, |
|
"formant_shifting": formant_shifting, |
|
"formant_qfrency": formant_qfrency, |
|
"formant_timbre": formant_timbre, |
|
"reverb": reverb, |
|
"pitch_shift": pitch_shift, |
|
"limiter": limiter, |
|
"gain": gain, |
|
"distortion": distortion, |
|
"chorus": chorus, |
|
"bitcrush": bitcrush, |
|
"clipping": clipping, |
|
"compressor": compressor, |
|
"delay": delay, |
|
"reverb_room_size": reverb_room_size, |
|
"reverb_damping": reverb_damping, |
|
"reverb_wet_level": reverb_wet_gain, |
|
"reverb_dry_level": reverb_dry_gain, |
|
"reverb_width": reverb_width, |
|
"reverb_freeze_mode": reverb_freeze_mode, |
|
"pitch_shift_semitones": pitch_shift_semitones, |
|
"limiter_threshold": limiter_threshold, |
|
"limiter_release": limiter_release_time, |
|
"gain_db": gain_db, |
|
"distortion_gain": distortion_gain, |
|
"chorus_rate": chorus_rate, |
|
"chorus_depth": chorus_depth, |
|
"chorus_delay": chorus_center_delay, |
|
"chorus_feedback": chorus_feedback, |
|
"chorus_mix": chorus_mix, |
|
"bitcrush_bit_depth": bitcrush_bit_depth, |
|
"clipping_threshold": clipping_threshold, |
|
"compressor_threshold": compressor_threshold, |
|
"compressor_ratio": compressor_ratio, |
|
"compressor_attack": compressor_attack, |
|
"compressor_release": compressor_release, |
|
"delay_seconds": delay_seconds, |
|
"delay_feedback": delay_feedback, |
|
"delay_mix": delay_mix, |
|
"sid": sid, |
|
} |
|
infer_pipeline = import_voice_converter() |
|
infer_pipeline.convert_audio( |
|
**kwargs, |
|
) |
|
|
|
|
|
|
|
|
|
def run_batch_infer_script( |
|
pitch: int, |
|
filter_radius: int, |
|
index_rate: float, |
|
volume_envelope: int, |
|
protect: float, |
|
hop_length: int, |
|
f0_method: str, |
|
input_folder: str, |
|
output_folder: str, |
|
pth_path: str, |
|
index_path: str, |
|
split_audio: bool, |
|
f0_autotune: bool, |
|
f0_autotune_strength: float, |
|
clean_audio: bool, |
|
clean_strength: float, |
|
export_format: str, |
|
f0_file: str, |
|
embedder_model: str, |
|
embedder_model_custom: str = None, |
|
formant_shifting: bool = False, |
|
formant_qfrency: float = 1.0, |
|
formant_timbre: float = 1.0, |
|
post_process: bool = False, |
|
reverb: bool = False, |
|
pitch_shift: bool = False, |
|
limiter: bool = False, |
|
gain: bool = False, |
|
distortion: bool = False, |
|
chorus: bool = False, |
|
bitcrush: bool = False, |
|
clipping: bool = False, |
|
compressor: bool = False, |
|
delay: bool = False, |
|
reverb_room_size: float = 0.5, |
|
reverb_damping: float = 0.5, |
|
reverb_wet_gain: float = 0.5, |
|
reverb_dry_gain: float = 0.5, |
|
reverb_width: float = 0.5, |
|
reverb_freeze_mode: float = 0.5, |
|
pitch_shift_semitones: float = 0.0, |
|
limiter_threshold: float = -6, |
|
limiter_release_time: float = 0.01, |
|
gain_db: float = 0.0, |
|
distortion_gain: float = 25, |
|
chorus_rate: float = 1.0, |
|
chorus_depth: float = 0.25, |
|
chorus_center_delay: float = 7, |
|
chorus_feedback: float = 0.0, |
|
chorus_mix: float = 0.5, |
|
bitcrush_bit_depth: int = 8, |
|
clipping_threshold: float = -6, |
|
compressor_threshold: float = 0, |
|
compressor_ratio: float = 1, |
|
compressor_attack: float = 1.0, |
|
compressor_release: float = 100, |
|
delay_seconds: float = 0.5, |
|
delay_feedback: float = 0.0, |
|
delay_mix: float = 0.5, |
|
sid: int = 0, |
|
): |
|
kwargs = { |
|
"audio_input_paths": input_folder, |
|
"audio_output_path": output_folder, |
|
"model_path": pth_path, |
|
"index_path": index_path, |
|
"pitch": pitch, |
|
"filter_radius": filter_radius, |
|
"index_rate": index_rate, |
|
"volume_envelope": volume_envelope, |
|
"protect": protect, |
|
"hop_length": hop_length, |
|
"f0_method": f0_method, |
|
"pth_path": pth_path, |
|
"index_path": index_path, |
|
"split_audio": split_audio, |
|
"f0_autotune": f0_autotune, |
|
"f0_autotune_strength": f0_autotune_strength, |
|
"clean_audio": clean_audio, |
|
"clean_strength": clean_strength, |
|
"export_format": export_format, |
|
"f0_file": f0_file, |
|
"embedder_model": embedder_model, |
|
"embedder_model_custom": embedder_model_custom, |
|
"post_process": post_process, |
|
"formant_shifting": formant_shifting, |
|
"formant_qfrency": formant_qfrency, |
|
"formant_timbre": formant_timbre, |
|
"reverb": reverb, |
|
"pitch_shift": pitch_shift, |
|
"limiter": limiter, |
|
"gain": gain, |
|
"distortion": distortion, |
|
"chorus": chorus, |
|
"bitcrush": bitcrush, |
|
"clipping": clipping, |
|
"compressor": compressor, |
|
"delay": delay, |
|
"reverb_room_size": reverb_room_size, |
|
"reverb_damping": reverb_damping, |
|
"reverb_wet_level": reverb_wet_gain, |
|
"reverb_dry_level": reverb_dry_gain, |
|
"reverb_width": reverb_width, |
|
"reverb_freeze_mode": reverb_freeze_mode, |
|
"pitch_shift_semitones": pitch_shift_semitones, |
|
"limiter_threshold": limiter_threshold, |
|
"limiter_release": limiter_release_time, |
|
"gain_db": gain_db, |
|
"distortion_gain": distortion_gain, |
|
"chorus_rate": chorus_rate, |
|
"chorus_depth": chorus_depth, |
|
"chorus_delay": chorus_center_delay, |
|
"chorus_feedback": chorus_feedback, |
|
"chorus_mix": chorus_mix, |
|
"bitcrush_bit_depth": bitcrush_bit_depth, |
|
"clipping_threshold": clipping_threshold, |
|
"compressor_threshold": compressor_threshold, |
|
"compressor_ratio": compressor_ratio, |
|
"compressor_attack": compressor_attack, |
|
"compressor_release": compressor_release, |
|
"delay_seconds": delay_seconds, |
|
"delay_feedback": delay_feedback, |
|
"delay_mix": delay_mix, |
|
"sid": sid, |
|
} |
|
infer_pipeline = import_voice_converter() |
|
infer_pipeline.convert_audio_batch( |
|
**kwargs, |
|
) |
|
|
|
return f"Files from {input_folder} inferred successfully." |
|
|
|
|
|
|
|
def run_tts_script( |
|
tts_file: str, |
|
tts_text: str, |
|
tts_voice: str, |
|
tts_rate: int, |
|
pitch: int, |
|
filter_radius: int, |
|
index_rate: float, |
|
volume_envelope: int, |
|
protect: float, |
|
hop_length: int, |
|
f0_method: str, |
|
output_tts_path: str, |
|
output_rvc_path: str, |
|
pth_path: str, |
|
index_path: str, |
|
split_audio: bool, |
|
f0_autotune: bool, |
|
f0_autotune_strength: float, |
|
clean_audio: bool, |
|
clean_strength: float, |
|
export_format: str, |
|
f0_file: str, |
|
embedder_model: str, |
|
embedder_model_custom: str = None, |
|
sid: int = 0, |
|
): |
|
|
|
tts_script_path = os.path.join("rvc", "lib", "tools", "tts.py") |
|
|
|
if os.path.exists(output_tts_path): |
|
os.remove(output_tts_path) |
|
|
|
command_tts = [ |
|
*map( |
|
str, |
|
[ |
|
python, |
|
tts_script_path, |
|
tts_file, |
|
tts_text, |
|
tts_voice, |
|
tts_rate, |
|
output_tts_path, |
|
], |
|
), |
|
] |
|
subprocess.run(command_tts) |
|
infer_pipeline = import_voice_converter() |
|
infer_pipeline.convert_audio( |
|
pitch=pitch, |
|
filter_radius=filter_radius, |
|
index_rate=index_rate, |
|
volume_envelope=volume_envelope, |
|
protect=protect, |
|
hop_length=hop_length, |
|
f0_method=f0_method, |
|
audio_input_path=output_tts_path, |
|
audio_output_path=output_rvc_path, |
|
model_path=pth_path, |
|
index_path=index_path, |
|
split_audio=split_audio, |
|
f0_autotune=f0_autotune, |
|
f0_autotune_strength=f0_autotune_strength, |
|
clean_audio=clean_audio, |
|
clean_strength=clean_strength, |
|
export_format=export_format, |
|
f0_file=f0_file, |
|
embedder_model=embedder_model, |
|
embedder_model_custom=embedder_model_custom, |
|
sid=sid, |
|
formant_shifting=None, |
|
formant_qfrency=None, |
|
formant_timbre=None, |
|
post_process=None, |
|
reverb=None, |
|
pitch_shift=None, |
|
limiter=None, |
|
gain=None, |
|
distortion=None, |
|
chorus=None, |
|
bitcrush=None, |
|
clipping=None, |
|
compressor=None, |
|
delay=None, |
|
sliders=None, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_prerequisites_script( |
|
|
|
models: bool, |
|
exe: bool, |
|
): |
|
prequisites_download_pipeline( |
|
|
|
models, |
|
exe, |
|
) |
|
return "Prerequisites installed successfully." |
|
|
|
|
|
|