|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
|
import pytorch_lightning as pl |
|
import torch |
|
from utils import get_model |
|
|
|
from nemo.collections.common.callbacks import LogEpochTimeCallback |
|
from nemo.collections.tts.models.base import G2PModel |
|
from nemo.core.config import hydra_runner |
|
from nemo.utils import logging, model_utils |
|
from nemo.utils.exp_manager import exp_manager |
|
|
|
""" |
|
This script supports training of G2PModels |
|
(for T5G2PModel use g2p_t5.yaml, for CTCG2PModel use either g2p_conformer.yaml or g2p_t5_ctc.yaml) |
|
|
|
# Training T5G2PModel and evaluation at the end of training: |
|
python examples/text_processing/g2p/g2p_train_and_evaluate.py \ |
|
# (Optional: --config-path=<Path to dir of configs> --config-name=<name of config without .yaml>) \ |
|
model.train_ds.manifest_filepath="<Path to manifest file>" \ |
|
model.validation_ds.manifest_filepath="<Path to manifest file>" \ |
|
model.test_ds.manifest_filepath="<Path to manifest file>" \ |
|
trainer.devices=1 \ |
|
do_training=True \ |
|
do_testing=True |
|
|
|
Example of the config file: NeMo/examples/tts/g2p/conf/g2p_t5.yaml |
|
|
|
# Training Conformer-G2P Model and evaluation at the end of training: |
|
python examples/text_processing/g2p/g2p_train_and_evaluate.py \ |
|
# (Optional: --config-path=<Path to dir of configs> --config-name=<name of config without .yaml>) \ |
|
model.train_ds.manifest_filepath="<Path to manifest file>" \ |
|
model.validation_ds.manifest_filepath="<Path to manifest file>" \ |
|
model.test_ds.manifest_filepath="<Path to manifest file>" \ |
|
model.tokenizer.dir=<Path to pretrained tokenizer> \ |
|
trainer.devices=1 \ |
|
do_training=True \ |
|
do_testing=True |
|
|
|
Example of the config file: NeMo/examples/text_processing/g2p/conf/g2p_conformer_ctc.yaml |
|
|
|
# Run evaluation of the pretrained model: |
|
python examples/text_processing/g2p/g2p_train_and_evaluate.py \ |
|
# (Optional: --config-path=<Path to dir of configs> --config-name=<name of config without .yaml>) \ |
|
pretrained_model="<Path to .nemo file or pretrained model name from list_available_models()>" \ |
|
model.test_ds.manifest_filepath="<Path to manifest file>" \ |
|
trainer.devices=1 \ |
|
do_training=False \ |
|
do_testing=True |
|
""" |
|
|
|
|
|
@hydra_runner(config_path="conf", config_name="g2p_t5") |
|
def main(cfg): |
|
trainer = pl.Trainer(**cfg.trainer) |
|
exp_manager(trainer, cfg.get("exp_manager", None)) |
|
|
|
g2p_model = None |
|
if cfg.do_training: |
|
g2p_model = get_model(cfg, trainer) |
|
lr_logger = pl.callbacks.LearningRateMonitor() |
|
epoch_time_logger = LogEpochTimeCallback() |
|
trainer.callbacks.extend([lr_logger, epoch_time_logger]) |
|
trainer.fit(g2p_model) |
|
|
|
if cfg.do_testing: |
|
logging.info( |
|
'During evaluation/testing, it is currently advisable to construct a new Trainer with single GPU and \ |
|
no DDP to obtain accurate results' |
|
) |
|
|
|
if torch.cuda.is_available(): |
|
device = [0] |
|
accelerator = 'gpu' |
|
else: |
|
device = 1 |
|
accelerator = 'cpu' |
|
|
|
map_location = torch.device('cuda:{}'.format(device[0]) if accelerator == 'gpu' else 'cpu') |
|
trainer = pl.Trainer(devices=device, accelerator=accelerator, logger=False, enable_checkpointing=False) |
|
|
|
if g2p_model is None: |
|
if os.path.exists(cfg.pretrained_model): |
|
|
|
model_cfg = G2PModel.restore_from(restore_path=cfg.pretrained_model, return_config=True) |
|
classpath = model_cfg.target |
|
imported_class = model_utils.import_class_by_path(classpath) |
|
logging.info(f"Restoring g2p_model : {imported_class.__name__}") |
|
g2p_model = imported_class.restore_from(restore_path=cfg.pretrained_model, map_location=map_location) |
|
model_name = os.path.splitext(os.path.basename(cfg.pretrained_model))[0] |
|
logging.info(f"Restored {model_name} g2p_model from {cfg.pretrained_model}.") |
|
elif cfg.pretrained_model in G2PModel.get_available_model_names(): |
|
|
|
g2p_model = G2PModel.from_pretrained(cfg.pretrained_model, map_location=map_location) |
|
else: |
|
raise ValueError( |
|
f'Provide path to the pre-trained .nemo checkpoint or choose from {G2PModel.list_available_models()}' |
|
) |
|
|
|
if hasattr(cfg.model, "test_ds") and cfg.model.test_ds.manifest_filepath is not None: |
|
g2p_model.setup_multiple_test_data(cfg.model.test_ds) |
|
if g2p_model.prepare_test(trainer): |
|
trainer.test(g2p_model) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|