|
name: fine_tune_retro |
|
|
|
trainer: |
|
devices: 2 |
|
num_nodes: 1 |
|
accelerator: gpu |
|
precision: 16 |
|
logger: False |
|
enable_checkpointing: False |
|
replace_sampler_ddp: False |
|
max_epochs: -1 |
|
max_steps: 100000 |
|
log_every_n_steps: 10 |
|
val_check_interval: 100 |
|
limit_val_batches: null |
|
limit_test_batches: null |
|
accumulate_grad_batches: 1 |
|
gradient_clip_val: 1.0 |
|
|
|
exp_manager: |
|
explicit_log_dir: null |
|
exp_dir: null |
|
name: megatron_retro |
|
create_wandb_logger: False |
|
wandb_logger_kwargs: |
|
project: null |
|
name: null |
|
resume_if_exists: True |
|
resume_ignore_no_checkpoint: True |
|
create_checkpoint_callback: True |
|
checkpoint_callback_params: |
|
monitor: val_loss |
|
save_top_k: 10 |
|
mode: min |
|
always_save_nemo: False |
|
filename: 'megatron_retro--{val_loss:.2f}-{step}-{consumed_samples}' |
|
model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} |
|
|
|
|
|
model: |
|
|
|
tensor_model_parallel_size: 1 |
|
pipeline_model_parallel_size: 1 |
|
|
|
micro_batch_size: 4 |
|
megatron_amp_O2: False |
|
|
|
tokenizer: |
|
library: 'megatron' |
|
type: 'GPT2BPETokenizer' |
|
model: null |
|
vocab_file: null |
|
merge_file: null |
|
delimiter: null |
|
|
|
gradient_as_bucket_view: True |
|
|
|
native_amp_init_scale: 4294967296 |
|
native_amp_growth_interval: 1000 |
|
fp16_lm_cross_entropy: False |
|
|
|
|
|
seed: 1234 |
|
|
|
restore_path: null |
|
|
|
data: |
|
train_ds: |
|
file_name: ??? |
|
answer_only_loss: True |
|
seq_length: 128 |
|
add_bos: True |
|
add_eos: True |
|
seed: 1234 |
|
neighbors: 20 |
|
val_ds: |
|
file_name: ??? |
|
answer_only_loss: True |
|
seq_length: 128 |
|
add_bos: True |
|
add_eos: True |
|
seed: 1234 |
|
neighbors: 20 |
|
test_ds: |
|
file_name: ??? |
|
answer_only_loss: True |
|
seq_length: 128 |
|
add_bos: True |
|
add_eos: True |
|
seed: 1234 |
|
neighbors: 20 |
|
|
|
|
|
optim: |
|
name: fused_adam |
|
lr: 1e-4 |
|
weight_decay: 0.01 |
|
betas: |
|
- 0.9 |
|
- 0.98 |
|
sched: |
|
name: CosineAnnealing |
|
warmup_steps: 500 |
|
constant_steps: 50000 |
|
min_lr: 1e-5 |
|
|