Spaces:
Paused
Paused
{ | |
"base_config": "config/base.json", | |
"model_type": "VC", | |
"dataset": ["mls"], | |
"model": { | |
"reference_encoder": { | |
"encoder_layer": 6, | |
"encoder_hidden": 512, | |
"encoder_head": 8, | |
"conv_filter_size": 2048, | |
"conv_kernel_size": 9, | |
"encoder_dropout": 0.2, | |
"use_skip_connection": false, | |
"use_new_ffn": true, | |
"ref_in_dim": 80, | |
"ref_out_dim": 512, | |
"use_query_emb": true, | |
"num_query_emb": 32 | |
}, | |
"diffusion": { | |
"beta_min": 0.05, | |
"beta_max": 20, | |
"sigma": 1.0, | |
"noise_factor": 1.0, | |
"ode_solve_method": "euler", | |
"diff_model_type": "WaveNet", | |
"diff_wavenet":{ | |
"input_size": 80, | |
"hidden_size": 512, | |
"out_size": 80, | |
"num_layers": 47, | |
"cross_attn_per_layer": 3, | |
"dilation_cycle": 2, | |
"attn_head": 8, | |
"drop_out": 0.2 | |
} | |
}, | |
"prior_encoder": { | |
"encoder_layer": 6, | |
"encoder_hidden": 512, | |
"encoder_head": 8, | |
"conv_filter_size": 2048, | |
"conv_kernel_size": 9, | |
"encoder_dropout": 0.2, | |
"use_skip_connection": false, | |
"use_new_ffn": true, | |
"vocab_size": 256, | |
"cond_dim": 512, | |
"duration_predictor": { | |
"input_size": 512, | |
"filter_size": 512, | |
"kernel_size": 3, | |
"conv_layers": 30, | |
"cross_attn_per_layer": 3, | |
"attn_head": 8, | |
"drop_out": 0.2 | |
}, | |
"pitch_predictor": { | |
"input_size": 512, | |
"filter_size": 512, | |
"kernel_size": 5, | |
"conv_layers": 30, | |
"cross_attn_per_layer": 3, | |
"attn_head": 8, | |
"drop_out": 0.5 | |
}, | |
"pitch_min": 50, | |
"pitch_max": 1100, | |
"pitch_bins_num": 512 | |
}, | |
"vc_feature": { | |
"content_feature_dim": 768, | |
"hidden_dim": 512 | |
} | |
} | |
} |