File size: 4,906 Bytes
7934b29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
import torch.multiprocessing as mp
from apex.transformer import parallel_state
from omegaconf import OmegaConf
from omegaconf.omegaconf import open_dict
from pytorch_lightning.trainer.trainer import Trainer
from nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model import MegatronGPTAdapterLearningModel
from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
from nemo.core.config import hydra_runner
mp.set_start_method("spawn", force=True)
"""
This is the script to run an Adapter Tuned GPT Model for text generation.
Usage:
Assume the model has TP=1, PP=1 in the following use cases.
a. run greedy inference using a base gpt nemo file, and an adapter nemo file:
python megatron_gpt_adapter_eval.py \
gpt_model_file=PATH TO GPT MODEL NEMO FILE \
adapter_model_file=PATH TO ADAPTER MODEL NEMO FILE (generated by training script: ./megatron_gpt_adapter_tuning.py) \
data_paths=[PATH TO A JSONL FILE CONTAINING PROMPTS], \
pred_file_path=PATH TO OUTPUT FILE TO DUMP PREDICTIONS
"""
if not torch.cuda.is_available():
raise EnvironmentError("GPU is needed for the inference")
@hydra_runner(config_path="conf", config_name="megatron_gpt_adapter_inference")
def main(cfg) -> None:
# trainer required for restoring model parallel models
trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer)
# Load an adapter model, must be provided in config
if cfg.get("adapter_model_file", None) is not None:
# Update frozen GPT model path in case it has changed
adapter_tuning_cfg = MegatronGPTAdapterLearningModel.restore_from(
cfg.adapter_model_file, trainer=trainer, return_config=True
)
with open_dict(adapter_tuning_cfg):
adapter_tuning_cfg.language_model_path = cfg.gpt_model_file
# Now load prompt learning model with frozen gpt model base
model = MegatronGPTAdapterLearningModel.restore_from(
restore_path=cfg.adapter_model_file, trainer=trainer, override_config_path=adapter_tuning_cfg
)
# Or load regular GPT model
else:
raise NotImplementedError(
"This script is meant for inference from an Adapter Tuned GPT Model, for inference from a Megatron GPT model, refer to ../megatron_gpt_eval.py"
)
model.freeze()
# Have to turn off activations_checkpoint_method for inference
try:
model.model.language_model.encoder.activations_checkpoint_method = None
except AttributeError:
pass
try:
model.frozen_model.model.language_model.encoder.activations_checkpoint_method = None
except AttributeError:
pass
max_input_length = model.frozen_model.cfg.encoder_seq_length - cfg.inference.tokens_to_generate
# check whether the DDP is initialized
if parallel_state.is_unitialized():
def dummy():
return
if trainer.strategy.launcher is not None:
trainer.strategy.launcher.launch(dummy, trainer=trainer)
trainer.strategy.setup_environment()
_, dataloader = model.build_virtual_prompt_dataset(
data=cfg.data_paths,
batch_size=cfg.get("batch_size", 1),
max_seq_length=max_input_length,
min_seq_length=model.cfg.data.get('min_seq_length', 1),
add_bos=cfg.inference.add_BOS,
add_eos=False,
for_train=False,
tokens_to_generate=cfg.inference.tokens_to_generate,
drop_last=False,
shuffle=False,
num_workers=cfg.get("num_workers", 1),
)
config = OmegaConf.to_container(cfg.inference)
model.set_inference_config(config)
response = trainer.predict(model, dataloader)
print("***************************")
if cfg.pred_file_path is not None:
with open(cfg.pred_file_path, "w", encoding="utf-8") as f:
for batch in response:
for sentence in batch['sentences']:
s = ' '.join(sentence.split('\n'))
f.write(s + "\n")
print("predictions saved to {}".format(cfg.pred_file_path))
else:
print(response)
print("***************************")
if __name__ == '__main__':
main() # noqa pylint: disable=no-value-for-parameter
|