Spaces:

NeoPy
/

neorvc

Running

App Files Files Community

neorvc / rvc /rvc_cli.py

NeoPy

Upload folder using huggingface_hub

1c7d911 verified 5 days ago

raw

history blame contribute delete

12.3 kB

	import os
	import sys
	import json
	import argparse
	import subprocess
	from functools import lru_cache
	from distutils.util import strtobool

	now_dir = os.getcwd()
	sys.path.append(now_dir)

	current_script_directory = os.path.dirname(os.path.realpath(__file__))
	logs_path = os.path.join(current_script_directory, "logs")

	from rvc.lib.tools.prerequisites_download import prequisites_download_pipeline

	python = sys.executable


	# Get TTS Voices -> https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4
	@lru_cache(maxsize=1) # Cache only one result since the file is static
	def load_voices_data():
	with open(
	os.path.join("rvc", "lib", "tools", "tts_voices.json"), "r", encoding="utf-8"
	) as file:
	return json.load(file)


	voices_data = load_voices_data()
	locales = list({voice["ShortName"] for voice in voices_data})


	@lru_cache(maxsize=None)
	def import_voice_converter():
	from rvc.infer.infer import VoiceConverter

	return VoiceConverter()


	@lru_cache(maxsize=1)
	def get_config():
	from rvc.configs.config import Config

	return Config()


	# Infer
	def run_infer_script(
	pitch: int,
	filter_radius: int,
	index_rate: float,
	volume_envelope: int,
	protect: float,
	hop_length: int,
	f0_method: str,
	input_path: str,
	output_path: str,
	pth_path: str,
	index_path: str,
	split_audio: bool,
	f0_autotune: bool,
	f0_autotune_strength: float,
	clean_audio: bool,
	clean_strength: float,
	export_format: str,
	f0_file: str,
	embedder_model: str,
	embedder_model_custom: str = None,
	formant_shifting: bool = False,
	formant_qfrency: float = 1.0,
	formant_timbre: float = 1.0,
	post_process: bool = False,
	reverb: bool = False,
	pitch_shift: bool = False,
	limiter: bool = False,
	gain: bool = False,
	distortion: bool = False,
	chorus: bool = False,
	bitcrush: bool = False,
	clipping: bool = False,
	compressor: bool = False,
	delay: bool = False,
	reverb_room_size: float = 0.5,
	reverb_damping: float = 0.5,
	reverb_wet_gain: float = 0.5,
	reverb_dry_gain: float = 0.5,
	reverb_width: float = 0.5,
	reverb_freeze_mode: float = 0.5,
	pitch_shift_semitones: float = 0.0,
	limiter_threshold: float = -6,
	limiter_release_time: float = 0.01,
	gain_db: float = 0.0,
	distortion_gain: float = 25,
	chorus_rate: float = 1.0,
	chorus_depth: float = 0.25,
	chorus_center_delay: float = 7,
	chorus_feedback: float = 0.0,
	chorus_mix: float = 0.5,
	bitcrush_bit_depth: int = 8,
	clipping_threshold: float = -6,
	compressor_threshold: float = 0,
	compressor_ratio: float = 1,
	compressor_attack: float = 1.0,
	compressor_release: float = 100,
	delay_seconds: float = 0.5,
	delay_feedback: float = 0.0,
	delay_mix: float = 0.5,
	sid: int = 0,
	):
	kwargs = {
	"audio_input_path": input_path,
	"audio_output_path": output_path,
	"model_path": pth_path,
	"index_path": index_path,
	"pitch": pitch,
	"filter_radius": filter_radius,
	"index_rate": index_rate,
	"volume_envelope": volume_envelope,
	"protect": protect,
	"hop_length": hop_length,
	"f0_method": f0_method,
	"pth_path": pth_path,
	"index_path": index_path,
	"split_audio": split_audio,
	"f0_autotune": f0_autotune,
	"f0_autotune_strength": f0_autotune_strength,
	"clean_audio": clean_audio,
	"clean_strength": clean_strength,
	"export_format": export_format,
	"f0_file": f0_file,
	"embedder_model": embedder_model,
	"embedder_model_custom": embedder_model_custom,
	"post_process": post_process,
	"formant_shifting": formant_shifting,
	"formant_qfrency": formant_qfrency,
	"formant_timbre": formant_timbre,
	"reverb": reverb,
	"pitch_shift": pitch_shift,
	"limiter": limiter,
	"gain": gain,
	"distortion": distortion,
	"chorus": chorus,
	"bitcrush": bitcrush,
	"clipping": clipping,
	"compressor": compressor,
	"delay": delay,
	"reverb_room_size": reverb_room_size,
	"reverb_damping": reverb_damping,
	"reverb_wet_level": reverb_wet_gain,
	"reverb_dry_level": reverb_dry_gain,
	"reverb_width": reverb_width,
	"reverb_freeze_mode": reverb_freeze_mode,
	"pitch_shift_semitones": pitch_shift_semitones,
	"limiter_threshold": limiter_threshold,
	"limiter_release": limiter_release_time,
	"gain_db": gain_db,
	"distortion_gain": distortion_gain,
	"chorus_rate": chorus_rate,
	"chorus_depth": chorus_depth,
	"chorus_delay": chorus_center_delay,
	"chorus_feedback": chorus_feedback,
	"chorus_mix": chorus_mix,
	"bitcrush_bit_depth": bitcrush_bit_depth,
	"clipping_threshold": clipping_threshold,
	"compressor_threshold": compressor_threshold,
	"compressor_ratio": compressor_ratio,
	"compressor_attack": compressor_attack,
	"compressor_release": compressor_release,
	"delay_seconds": delay_seconds,
	"delay_feedback": delay_feedback,
	"delay_mix": delay_mix,
	"sid": sid,
	}
	infer_pipeline = import_voice_converter()
	infer_pipeline.convert_audio(
	**kwargs,
	)



	# Batch infer
	def run_batch_infer_script(
	pitch: int,
	filter_radius: int,
	index_rate: float,
	volume_envelope: int,
	protect: float,
	hop_length: int,
	f0_method: str,
	input_folder: str,
	output_folder: str,
	pth_path: str,
	index_path: str,
	split_audio: bool,
	f0_autotune: bool,
	f0_autotune_strength: float,
	clean_audio: bool,
	clean_strength: float,
	export_format: str,
	f0_file: str,
	embedder_model: str,
	embedder_model_custom: str = None,
	formant_shifting: bool = False,
	formant_qfrency: float = 1.0,
	formant_timbre: float = 1.0,
	post_process: bool = False,
	reverb: bool = False,
	pitch_shift: bool = False,
	limiter: bool = False,
	gain: bool = False,
	distortion: bool = False,
	chorus: bool = False,
	bitcrush: bool = False,
	clipping: bool = False,
	compressor: bool = False,
	delay: bool = False,
	reverb_room_size: float = 0.5,
	reverb_damping: float = 0.5,
	reverb_wet_gain: float = 0.5,
	reverb_dry_gain: float = 0.5,
	reverb_width: float = 0.5,
	reverb_freeze_mode: float = 0.5,
	pitch_shift_semitones: float = 0.0,
	limiter_threshold: float = -6,
	limiter_release_time: float = 0.01,
	gain_db: float = 0.0,
	distortion_gain: float = 25,
	chorus_rate: float = 1.0,
	chorus_depth: float = 0.25,
	chorus_center_delay: float = 7,
	chorus_feedback: float = 0.0,
	chorus_mix: float = 0.5,
	bitcrush_bit_depth: int = 8,
	clipping_threshold: float = -6,
	compressor_threshold: float = 0,
	compressor_ratio: float = 1,
	compressor_attack: float = 1.0,
	compressor_release: float = 100,
	delay_seconds: float = 0.5,
	delay_feedback: float = 0.0,
	delay_mix: float = 0.5,
	sid: int = 0,
	):
	kwargs = {
	"audio_input_paths": input_folder,
	"audio_output_path": output_folder,
	"model_path": pth_path,
	"index_path": index_path,
	"pitch": pitch,
	"filter_radius": filter_radius,
	"index_rate": index_rate,
	"volume_envelope": volume_envelope,
	"protect": protect,
	"hop_length": hop_length,
	"f0_method": f0_method,
	"pth_path": pth_path,
	"index_path": index_path,
	"split_audio": split_audio,
	"f0_autotune": f0_autotune,
	"f0_autotune_strength": f0_autotune_strength,
	"clean_audio": clean_audio,
	"clean_strength": clean_strength,
	"export_format": export_format,
	"f0_file": f0_file,
	"embedder_model": embedder_model,
	"embedder_model_custom": embedder_model_custom,
	"post_process": post_process,
	"formant_shifting": formant_shifting,
	"formant_qfrency": formant_qfrency,
	"formant_timbre": formant_timbre,
	"reverb": reverb,
	"pitch_shift": pitch_shift,
	"limiter": limiter,
	"gain": gain,
	"distortion": distortion,
	"chorus": chorus,
	"bitcrush": bitcrush,
	"clipping": clipping,
	"compressor": compressor,
	"delay": delay,
	"reverb_room_size": reverb_room_size,
	"reverb_damping": reverb_damping,
	"reverb_wet_level": reverb_wet_gain,
	"reverb_dry_level": reverb_dry_gain,
	"reverb_width": reverb_width,
	"reverb_freeze_mode": reverb_freeze_mode,
	"pitch_shift_semitones": pitch_shift_semitones,
	"limiter_threshold": limiter_threshold,
	"limiter_release": limiter_release_time,
	"gain_db": gain_db,
	"distortion_gain": distortion_gain,
	"chorus_rate": chorus_rate,
	"chorus_depth": chorus_depth,
	"chorus_delay": chorus_center_delay,
	"chorus_feedback": chorus_feedback,
	"chorus_mix": chorus_mix,
	"bitcrush_bit_depth": bitcrush_bit_depth,
	"clipping_threshold": clipping_threshold,
	"compressor_threshold": compressor_threshold,
	"compressor_ratio": compressor_ratio,
	"compressor_attack": compressor_attack,
	"compressor_release": compressor_release,
	"delay_seconds": delay_seconds,
	"delay_feedback": delay_feedback,
	"delay_mix": delay_mix,
	"sid": sid,
	}
	infer_pipeline = import_voice_converter()
	infer_pipeline.convert_audio_batch(
	**kwargs,
	)

	return f"Files from {input_folder} inferred successfully."


	# TTS
	def run_tts_script(
	tts_file: str,
	tts_text: str,
	tts_voice: str,
	tts_rate: int,
	pitch: int,
	filter_radius: int,
	index_rate: float,
	volume_envelope: int,
	protect: float,
	hop_length: int,
	f0_method: str,
	output_tts_path: str,
	output_rvc_path: str,
	pth_path: str,
	index_path: str,
	split_audio: bool,
	f0_autotune: bool,
	f0_autotune_strength: float,
	clean_audio: bool,
	clean_strength: float,
	export_format: str,
	f0_file: str,
	embedder_model: str,
	embedder_model_custom: str = None,
	sid: int = 0,
	):

	tts_script_path = os.path.join("rvc", "lib", "tools", "tts.py")

	if os.path.exists(output_tts_path):
	os.remove(output_tts_path)

	command_tts = [
	*map(
	str,
	[
	python,
	tts_script_path,
	tts_file,
	tts_text,
	tts_voice,
	tts_rate,
	output_tts_path,
	],
	),
	]
	subprocess.run(command_tts)
	infer_pipeline = import_voice_converter()
	infer_pipeline.convert_audio(
	pitch=pitch,
	filter_radius=filter_radius,
	index_rate=index_rate,
	volume_envelope=volume_envelope,
	protect=protect,
	hop_length=hop_length,
	f0_method=f0_method,
	audio_input_path=output_tts_path,
	audio_output_path=output_rvc_path,
	model_path=pth_path,
	index_path=index_path,
	split_audio=split_audio,
	f0_autotune=f0_autotune,
	f0_autotune_strength=f0_autotune_strength,
	clean_audio=clean_audio,
	clean_strength=clean_strength,
	export_format=export_format,
	f0_file=f0_file,
	embedder_model=embedder_model,
	embedder_model_custom=embedder_model_custom,
	sid=sid,
	formant_shifting=None,
	formant_qfrency=None,
	formant_timbre=None,
	post_process=None,
	reverb=None,
	pitch_shift=None,
	limiter=None,
	gain=None,
	distortion=None,
	chorus=None,
	bitcrush=None,
	clipping=None,
	compressor=None,
	delay=None,
	sliders=None,
	)






	# Prerequisites
	def run_prerequisites_script(

	models: bool,
	exe: bool,
	):
	prequisites_download_pipeline(

	models,
	exe,
	)
	return "Prerequisites installed successfully."