Spaces:
Running
on
Zero
Running
on
Zero
_target_: modules.v2.vc_wrapper.VoiceConversionWrapper | |
sr: 22050 | |
hop_size: 256 | |
mel_fn: | |
_target_: modules.audio.mel_spectrogram | |
_partial_: true | |
n_fft: 1024 | |
win_size: 1024 | |
hop_size: 256 | |
num_mels: 80 | |
sampling_rate: 22050 | |
fmin: 0 | |
fmax: null | |
center: False | |
cfm: | |
_target_: modules.v2.cfm.CFM | |
estimator: | |
_target_: modules.v2.dit_wrapper.DiT | |
time_as_token: true | |
style_as_token: true | |
uvit_skip_connection: false | |
block_size: 8192 | |
depth: 13 | |
num_heads: 8 | |
hidden_dim: 512 | |
in_channels: 80 | |
content_dim: 512 | |
style_encoder_dim: 192 | |
class_dropout_prob: 0.1 | |
dropout_rate: 0.0 | |
attn_dropout_rate: 0.0 | |
cfm_length_regulator: | |
_target_: modules.v2.length_regulator.InterpolateRegulator | |
channels: 512 | |
is_discrete: true | |
codebook_size: 2048 | |
sampling_ratios: [ 1, 1, 1, 1 ] | |
f0_condition: false | |
ar: | |
_target_: modules.v2.ar.NaiveWrapper | |
model: | |
_target_: modules.v2.ar.NaiveTransformer | |
config: | |
_target_: modules.v2.ar.NaiveModelArgs | |
dropout: 0.0 | |
rope_base: 10000.0 | |
dim: 768 | |
head_dim: 64 | |
n_local_heads: 2 | |
intermediate_size: 2304 | |
n_head: 12 | |
n_layer: 12 | |
vocab_size: 2049 # 1 + 1 for eos | |
ar_length_regulator: | |
_target_: modules.v2.length_regulator.InterpolateRegulator | |
channels: 768 | |
is_discrete: true | |
codebook_size: 32 | |
sampling_ratios: [ ] | |
f0_condition: false | |
style_encoder: | |
_target_: modules.campplus.DTDNN.CAMPPlus | |
feat_dim: 80 | |
embedding_size: 192 | |
content_extractor_narrow: | |
_target_: modules.astral_quantization.default_model.AstralQuantizer | |
tokenizer_name: "openai/whisper-small" | |
ssl_model_name: "facebook/hubert-large-ll60k" | |
ssl_output_layer: 18 | |
skip_ssl: true | |
encoder: | |
_target_: modules.astral_quantization.convnext.ConvNeXtV2Stage | |
dim: 512 | |
num_blocks: 12 | |
intermediate_dim: 1536 | |
dilation: 1 | |
input_dim: 1024 | |
quantizer: | |
_target_: modules.astral_quantization.bsq.BinarySphericalQuantize | |
codebook_size: 32 # codebook size, must be a power of 2 | |
dim: 512 | |
entropy_loss_weight: 0.1 | |
diversity_gamma: 1.0 | |
spherical: True | |
enable_entropy_loss: True | |
soft_entropy_loss: True | |
content_extractor_wide: | |
_target_: modules.astral_quantization.default_model.AstralQuantizer | |
tokenizer_name: "openai/whisper-small" | |
ssl_model_name: "facebook/hubert-large-ll60k" | |
ssl_output_layer: 18 | |
encoder: | |
quantizer: | |
_target_: modules.astral_quantization.bsq.BinarySphericalQuantize | |
codebook_size: 2048 # codebook size, must be a power of 2 | |
dim: 512 | |
entropy_loss_weight: 0.1 | |
diversity_gamma: 1.0 | |
spherical: True | |
enable_entropy_loss: True | |
soft_entropy_loss: True | |
vocoder: | |
_target_: modules.bigvgan.bigvgan.BigVGAN.from_pretrained | |
pretrained_model_name_or_path: "nvidia/bigvgan_v2_22khz_80band_256x" | |
use_cuda_kernel: false | |