NeMo / examples /tts /conf /spectrogram-enhancer.yaml
camenduru's picture
thanks to NVIDIA ❤
7934b29
name: "spectrogram-enhancer"
model:
n_bands: 80
latent_dim: 192
style_depth: 4
network_capacity: 16
mixed_prob: 0.9
fmap_max: 192
start_from_zero: true # might give better results at downstream tasks
generator:
_target_: "nemo.collections.tts.modules.spectrogram_enhancer.Generator"
n_bands: ${model.n_bands}
latent_dim: ${model.latent_dim}
network_capacity: ${model.network_capacity}
style_depth: ${model.style_depth}
fmap_max: ${model.fmap_max}
discriminator:
_target_: "nemo.collections.tts.modules.spectrogram_enhancer.Discriminator"
n_bands: ${model.n_bands}
network_capacity: ${model.network_capacity}
fmap_max: ${model.fmap_max}
consistency_loss_weight: 10.0 # somewhere in [1., 100.], less for clean datasets, higher for noisier
gradient_penalty_loss_weight: 10.0 # read stylegan papers before changing
gradient_penalty_loss_every_n_steps: 4
# Spectrogram values range, calculated over your dataset with matching STFT parameters.
# Needed for treating spectrograms as images with pixel values around [0, 1].
# For LibriTTS, you can try [-13.18, 4.78]
spectrogram_min_value: ???
spectrogram_max_value: ???
train_ds:
dataset:
_target_: "nemo.collections.tts.data.tts_dataset.PairedRealFakeSpectrogramsDataset"
manifest_filepath: ???
dataloader_params:
drop_last: true
shuffle: true
batch_size: 8
num_workers: 2
generator_opt:
_target_: torch.optim.Adam
lr: 2e-4
betas: [0.5, 0.9]
discriminator_opt:
_target_: torch.optim.Adam
lr: 2e-4
betas: [0.5, 0.9]
trainer:
num_nodes: 1
devices: 1
accelerator: gpu
strategy: ddp
precision: 32
max_epochs: 4
accumulate_grad_batches: 1
gradient_clip_val: 1000.0
log_every_n_steps: 1000
# we don't really need validation
check_val_every_n_epoch: null
limit_val_batches: 0.0
benchmark: false
# provided by exp_manager
enable_checkpointing: False
logger: false
exp_manager:
exp_dir: ""
name: ${name}
create_tensorboard_logger: true
create_checkpoint_callback: true
# no good stopping rule, keep every checkpoint
# tune n_epochs for size of your dataset to avoid wasting space
checkpoint_callback_params:
every_n_epochs: 1
save_on_train_epoch_end: true
save_top_k: -1
monitor: "g_loss"
resume_if_exists: false
resume_ignore_no_checkpoint: false