File size: 2,725 Bytes
7934b29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
name: megatron_t5_finetune_eval

trainer:
  devices: 1
  num_nodes: 1
  accelerator: gpu
  precision: 16
  logger: False # logger provided by exp_manager
  enable_checkpointing: False
  replace_sampler_ddp: False
  benchmark: False

exp_manager:
  explicit_log_dir: null
  exp_dir: null
  name: megatron_t5_finetune_eval
  create_checkpoint_callback: False

model:
  restore_from_path: null # Path to a trained T5 .nemo file
  pretrained_checkpoint:
    checkpoint_dir: null # Path to a folder that contains a .ckpt file
    checkpoint_name: null # Name of the .ckpt file within the checkpoint_dir.
    hparams_file: null # Path to a .yaml file that contains the hyperparameters of the checkpoint.
  gradient_as_bucket_view: True # Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
  megatron_amp_O2: False # Enable O2 optimization for megatron amp

  data:
    validation_ds:
      src_file_name: null # Path to the txt file corresponding to the source data.
      tgt_file_name: null # Path to the txt file corresponding to the target data.
      names: null # If src/tgt file names are ListConfigs, the corresponding label is used to log metrics.
      global_batch_size: 64
      micro_batch_size: 64
      shuffle: False
      num_workers: 0
      pin_memory: True
      max_src_seq_length: 512
      max_tgt_seq_length: 128
      drop_last: False # TODO: Figure out if there is a way to avoid dropping last.
      write_predictions_to_file: False
      output_file_path_prefix: null # Prefix of the file to write predictions to.
      replace_bos_with_pad: False # Replaces bos with pad for both the encoder and decoder. This is necessary when using Google's T5 checkpoints.
      add_bos_to_input: False # Adds bos to the input sequence.
      add_eos_to_input: False # Adds eos to the input sequence.

      metric:
        name: "exact_string_match" # Name of the evaluation metric to use.
        average: micro # Average the metric over the dataset. Options: ['macro', 'micro']. Works only for 'F1', 'accuracy' etc. Refer to torchmetrics for metrics where this is supported.
        num_classes: null # Number of classes for the metric. Works only for 'F1', 'accuracy' and 'average_precision' etc. Refer to torchmetrics for metrics where this is supported.
        class_labels: null # If the targets in your dataset are strings and not integers/float, you need to provide a list of class labels (size = num_classes) so we can convert from strings to integer categories to compute the metric.
        labels_are_strings: True # NOTE: This is only required to properly handle metrics like f1, accuracy, average_precision etc. This does not affect extract_string_match.