In [None]:
BRANCH='r1.17.0'

In [None]:
"""
You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.

Instructions for setting up Colab are as follows:
1. Open a new Python 3 notebook.
2. Import this notebook from GitHub (File -> Upload Notebook -> "GITHUB" tab -> copy/paste GitHub URL)
3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select "GPU" for hardware accelerator)
4. Run this cell to set up dependencies.
"""
# If you're using Google Colab and not running locally, run this cell

# install NeMo
!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]

In [None]:
import os
import wget 
import torch
import pytorch_lightning as pl
from omegaconf import OmegaConf

# Task Description
In this tutorial, we are going to describe how to export NeMo NLP models with BERT based models as the pre-trained model.

## Convert the Megatron-LM Weights to Nemo file

If you prefer to use the Huggingface BERT models, please skip this section and refer to `Setting up a NeMo Experiment` section to load a model from `nemo_nlp.modules.get_pretrained_lm_models_list()`

NeMo Megatron BERT can [load from a pretrained model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/core/core.html?highlight=nemo%20file#restore) using `.nemo` file. We can convert the Megatron-LM checkpoint to the `.nemo` file. Let's first download the pretrained model weights and vocabulary file.

In [None]:
from nemo.collections.nlp.modules.common.megatron.megatron_utils import MEGATRON_CONFIG_MAP
import pathlib

PRETRAINED_BERT_MODEL = "megatron-bert-345m-uncased" # specify BERT-like model from MEGATRON_CONFIG_MAP.keys()
nemo_out_path = "qa_pretrained.nemo" # the nemo output file name

checkpoint_url = MEGATRON_CONFIG_MAP[PRETRAINED_BERT_MODEL]['checkpoint']
vocab_url = MEGATRON_CONFIG_MAP[PRETRAINED_BERT_MODEL]['vocab']
checkpoint_filename = pathlib.Path(checkpoint_url).name
vocab_filename = pathlib.Path(vocab_url).name
if not pathlib.Path(checkpoint_filename).exists():
 print('downloading from checkpoint url', checkpoint_url)
 !wget $checkpoint_url
if not pathlib.Path(vocab_filename).exists():
 print('downloading from vocab url', vocab_url)
 !wget $vocab_url

In [None]:
WORK_DIR = "WORK_DIR"
os.makedirs(WORK_DIR, exist_ok=True)

# Prepare the model parameters 
# download the model's configuration file 
config_dir = WORK_DIR + '/configs/'
MODEL_CONFIG = "megatron_bert_config.yaml"
os.makedirs(config_dir, exist_ok=True)
if not os.path.exists(config_dir + MODEL_CONFIG):
 print('Downloading config file...')
 wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/language_modeling/conf/' + MODEL_CONFIG, config_dir)
else:
 print ('config file is already exists')

In [None]:
# this line will print the entire config of the model
config_path = f'{WORK_DIR}/configs/{MODEL_CONFIG}'
print(config_path)
config = OmegaConf.load(config_path)

config.model.megatron_legacy = True # set to true if you trained the NLP model on NeMo < 1.5.0
config.model.bias_gelu_fusion = False # set to true if you want the MegatronLM to NeMo conversion for training; and set to false to use the converted model at time of export 
config.model.masked_softmax_fusion = False # set to true if you want the MegatronLM to NeMo conversion for training; and set to false to use the converted model at time of export

config.model.num_layers = 24
config.model.hidden_size = 1024
config.model.ffn_hidden_size = 4096
config.model.num_attention_heads = 16
config.model.tokenizer.vocab_file = vocab_filename
config.model.tokenizer.type = 'BertWordPieceLowerCase' # change this to BertWordPieceCase if you are using a cased pretrained model
config.model.tensor_model_parallel_size = 1
config.model.data.data_prefix = ''
config.model.max_position_embeddings = 512
config.model.data.seq_length = 512
config.cfg = {}
config.cfg.cfg = config.model
with open('hparams.yaml', 'w') as f:
 f.write(OmegaConf.to_yaml(config.cfg))
if(config.model.megatron_legacy):
 checkpoint_filename = "model_optim_rng_ca.pt" #provide path to the pretrained pt file you used during training on NeMo < 1.5.0, for NeMo >= 1.5.0
print(checkpoint_filename)

In [None]:
import os
PWD = os.getcwd()
wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/language_modeling/megatron_lm_ckpt_to_nemo.py')
!python -m torch.distributed.run --nproc_per_node=1 megatron_lm_ckpt_to_nemo.py --checkpoint_folder=$PWD --checkpoint_name=$checkpoint_filename --hparams_file=$PWD/hparams.yaml --nemo_file_path=$PWD/$nemo_out_path --model_type=bert --tensor_model_parallel_size=1

# Legacy NLP Bert based model conversion

Step 1: Convert legacy nemo checkpoint to a checkpoint which is currently supported by nemo

Step 2: Use the converted model from step 1 to export the nemo file to the required format

In [None]:
wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/scripts/nemo_legacy_import/nlp_checkpoint_port.py')
wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/scripts/export.py')

In [None]:
legacy_nemo_file_path = "/NeMo/megatron_multiqa.nemo" #path to you model trained on NeMo < 1.5
nemo_converted_out_path = "converted_megatron_multiqa.nemo"
megatron_absolute_language_model_path = "/NeMo/tutorials/nlp/qa_pretrained.nemo" # Give the absolute path of the model you obtained using megatron_lm_ckpt_to_nemo
onnx_export_out_path = "onnx_megatron_multiqa.onnx"

In [None]:
os.system(f"python nlp_checkpoint_port.py {legacy_nemo_file_path} {nemo_converted_out_path} --megatron-legacy=True --megatron-checkpoint {megatron_absolute_language_model_path}")

In [None]:
os.system(f"python export.py {nemo_converted_out_path} {onnx_export_out_path} --autocast --runtime-check")

# Convert a NLP model with BERT based pre-trained model trained on NeMo >= 1.5.0

For models trained on NeMo >= 1.5.0, you just run the export script and skip the legacy conversion part

In [None]:
nemo_file_path = ""
onnx_export_out_path = 

In [None]:
python export.py $nemo_converted_out_path $onnx_export_out_path --autocast --runtime-check