File size: 3,517 Bytes
7934b29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
name: p_tuning_squad_t5

trainer:
  devices: 1
  accelerator: gpu
  num_nodes: 1
  precision: 16
  logger: False 
  enable_checkpointing: False
  replace_sampler_ddp: False
  max_epochs: 10
  max_steps: -1
  log_every_n_steps: 10
  val_check_interval: 1.0
  gradient_clip_val: 1.0
  resume_from_checkpoint: null 

exp_manager:
  explicit_log_dir: null
  exp_dir: null
  name: ${name}
  create_wandb_logger: False
  wandb_logger_kwargs:
    project: PromptLearning-T5
    name: ${name}
  resume_if_exists: True
  resume_ignore_no_checkpoint: True
  create_checkpoint_callback: True
  checkpoint_callback_params:
    monitor: val_loss
    save_top_k: 2
    mode: min
    save_nemo_on_train_end: False # Should be false, correct prompt learning model file is saved at model.nemo_path set below
    filename: "megatron_t5_prompt_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}"
    model_parallel_size: ${model.tensor_model_parallel_size}
    save_best_model: True
  create_early_stopping_callback: True
  early_stopping_callback_params:
    monitor: "val_loss"
    mode: "min"
    min_delta: 0.001
    patience: 10
    verbose: True

model:
  seed: 1234
  nemo_path: ${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
  virtual_prompt_style: "p-tuning" # one of 'prompt-tuning', 'p-tuning', or 'inference'
  tensor_model_parallel_size: 1
  pipeline_model_parallel_size: 1 
  global_batch_size: 8 
  micro_batch_size: 8 # micro batch size should equal global batch size when pipeline parallel = 1
  validation_global_batch_size: ${model.global_batch_size}
  validation_micro_batch_size: ${model.micro_batch_size}
  validation_drop_last: False
  report_validation_accuracy: False
  
  restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with
  language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required
  save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training. 
  existing_tasks: []
  new_tasks: ["squad"] 


  task_templates: 
  - taskname: "squad" 
    prompt_template: "<|VIRTUAL_PROMPT_0|> {context} {question} {answer}" 
    total_virtual_tokens: 100
    virtual_token_splits: [100] 
    truncate_field: context
    answer_field: answer

  p_tuning: # P-tuning specific params
      encoder_type: "mlp" # Either "mlp" or "lstm", mlp is default
      num_layers: 2 # 2 recommended for MLP, 1 recommended for LSTM, must be at least 2 for mlp
      dropout: 0.0

  prompt_tuning: # Prompt tunin specific params
    new_prompt_init_methods: ['text'] # List of 'text' or 'random', should correspond to tasks listed in new tasks
    new_prompt_init_text: ['some init text goes here'] # some init text if init method is text, or None if init method is random

  data:
    train_ds: ["data/squad_train.jsonl"]
    validation_ds: ["data/squad_val.jsonl"]
    add_eos: true
    add_bos: false
    decoder_starts_with_pad: False
    add_eos_to_decoder_output: True
    add_sentinel_to_input: True
    ul2_prompt_token: null # <extra_id_s>, <extra_id_r>, <extra_id_x>
    shuffle: true
    num_workers: 4
    pin_memory: true

  optim:
    name: fused_adam
    lr: 1e-4
    weight_decay: 0.01 
    betas: 
    - 0.9
    - 0.98
    sched:
      name: CosineAnnealing
      warmup_steps: 50
      constant_steps: 0
      min_lr: 0.0
      monitor: val_loss
      reduce_on_plateau: false