|
name: megatron_t0 |
|
|
|
trainer: |
|
devices: 1 |
|
num_nodes: 1 |
|
accelerator: gpu |
|
precision: 16 |
|
logger: False |
|
enable_checkpointing: False |
|
replace_sampler_ddp: False |
|
max_epochs: -1 |
|
max_steps: 100000 |
|
log_every_n_steps: 10 |
|
val_check_interval: 300 |
|
accumulate_grad_batches: 1 |
|
gradient_clip_val: 1.0 |
|
|
|
exp_manager: |
|
explicit_log_dir: null |
|
exp_dir: null |
|
name: megatron_t0 |
|
create_wandb_logger: False |
|
wandb_logger_kwargs: |
|
project: null |
|
name: null |
|
resume_if_exists: True |
|
resume_ignore_no_checkpoint: True |
|
create_checkpoint_callback: True |
|
checkpoint_callback_params: |
|
monitor: validation_${model.data.validation_ds.metric.name} |
|
save_top_k: 10 |
|
mode: max |
|
always_save_nemo: False |
|
filename: 'megatron_t0--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}-{consumed_samples}' |
|
model_parallel_size: ${model.tensor_model_parallel_size} |
|
save_best_model: True |
|
|
|
model: |
|
restore_from_path: null |
|
pretrained_checkpoint: |
|
checkpoint_dir: null |
|
checkpoint_name: null |
|
hparams_file: null |
|
tensor_model_parallel_size: 1 |
|
pipeline_model_parallel_size: 1 |
|
pipeline_model_parallel_split_rank: 0 |
|
gradient_as_bucket_view: True |
|
megatron_amp_O2: False |
|
resume_from_checkpoint: null |
|
hidden_dropout: 0.1 |
|
attention_dropout: 0.1 |
|
|
|
data: |
|
train_ds: |
|
file_names: ??? |
|
global_batch_size: 128 |
|
micro_batch_size: 16 |
|
shuffle: True |
|
num_workers: 8 |
|
pin_memory: True |
|
max_src_seq_length: 512 |
|
max_tgt_seq_length: 512 |
|
drop_last: True |
|
concat_sampling_probabilities: ??? |
|
replace_bos_with_pad: False |
|
add_bos_to_input: False |
|
add_eos_to_input: False |
|
seed: 1234 |
|
|
|
validation_ds: |
|
file_names: ??? |
|
names: null |
|
global_batch_size: 16 |
|
micro_batch_size: 16 |
|
shuffle: False |
|
num_workers: 0 |
|
pin_memory: True |
|
max_src_seq_length: 512 |
|
max_tgt_seq_length: 512 |
|
drop_last: False |
|
write_predictions_to_file: False |
|
output_file_path_prefix: null |
|
metric: |
|
name: "exact_string_match" |
|
average: null |
|
num_classes: null |
|
replace_bos_with_pad: ${data.train_ds.replace_bos_with_pad} |
|
add_bos_to_input: ${data.train_ds.add_bos_to_input} |
|
add_eos_to_input: ${data.train_ds.add_eos_to_input} |
|
seed: 1234 |
|
|
|
optim: |
|
name: fused_adam |
|
lr: 5e-6 |
|
weight_decay: 0.0 |
|
|