NeMo / examples /tts /g2p /g2p_inference.py

thanks to NVIDIA ❤

7934b29 about 2 years ago

4.74 kB

	# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os
	from dataclasses import dataclass, is_dataclass
	from typing import Optional

	import pytorch_lightning as pl
	import torch
	from omegaconf import OmegaConf
	from utils import get_metrics

	from nemo.collections.tts.models.base import G2PModel
	from nemo.core.config import hydra_runner
	from nemo.utils import logging

	"""
	python g2p_inference.py \
	pretrained_model=<Path to .nemo file or pretrained model name for G2PModel from list_available_models()>" \
	manifest_filepath="<Path to .json manifest>" \
	output_file="<Path to .json manifest to save prediction>" \
	batch_size=32 \
	num_workers=4 \
	pred_field=pred_text
	"""


	@dataclass
	class TranscriptionConfig:
	# Required configs
	pretrained_model: str # Path to a .nemo file or Name of a pretrained model
	manifest_filepath: str # Path to .json manifest file
	phoneme_field: Optional[
	str
	] = None # name of the field in manifest_filepath for ground truth phonemes, default during training "text"
	grapheme_field: Optional[str] = "text_graphemes" # name of the field in manifest_filepath for input grapheme text

	# General configs
	output_file: Optional[
	str
	] = None # Path to .json manifest file to save predictions, will be saved in "target_field"
	pred_field: Optional[str] = "pred_text" # name of the field in the output_file to save predictions
	batch_size: int = 32 # Batch size to use for inference
	num_workers: int = 0 # Number of workers to use for DataLoader during inference

	# Config for heteronyms correction
	pretrained_heteronyms_model: Optional[
	str
	] = None # Path to a .nemo file or a Name of a pretrained model to disambiguate heteronyms (Optional)


	@hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig)
	def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
	logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')

	if is_dataclass(cfg):
	cfg = OmegaConf.structured(cfg)

	if not cfg.pretrained_model:
	raise ValueError(
	'To run evaluation and inference script a pre-trained model or .nemo file must be provided.'
	f'Choose from {G2PModel.list_available_models()} or "pretrained_model"="your_model.nemo"'
	)

	logging.info(
	'During evaluation/testing, it is currently advisable to construct a new Trainer with single GPU and \
	no DDP to obtain accurate results'
	)

	# setup GPU
	if torch.cuda.is_available():
	device = [0] # use 0th CUDA device
	accelerator = 'gpu'
	else:
	device = 1
	accelerator = 'cpu'

	map_location = torch.device('cuda:{}'.format(device[0]) if accelerator == 'gpu' else 'cpu')
	trainer = pl.Trainer(devices=device, accelerator=accelerator, logger=False, enable_checkpointing=False)

	if os.path.exists(cfg.pretrained_model):
	model = G2PModel.restore_from(cfg.pretrained_model, map_location=map_location)
	elif cfg.pretrained_model in G2PModel.get_available_model_names():
	model = G2PModel.from_pretrained(cfg.pretrained_model, map_location=map_location)
	else:
	raise ValueError(
	f'Provide path to the pre-trained .nemo checkpoint or choose from {G2PModel.list_available_models()}'
	)
	model._cfg.max_source_len = 512
	model.set_trainer(trainer)
	model = model.eval()

	if cfg.output_file is None:
	cfg.output_file = cfg.manifest_filepath.replace(".json", "_phonemes.json")

	with torch.no_grad():
	model.convert_graphemes_to_phonemes(
	manifest_filepath=cfg.manifest_filepath,
	output_manifest_filepath=cfg.output_file,
	grapheme_field=cfg.grapheme_field,
	batch_size=cfg.batch_size,
	num_workers=cfg.num_workers,
	pred_field=cfg.pred_field,
	)
	print(f"IPA predictions saved in {cfg.output_file}")

	if cfg.phoneme_field is not None:
	get_metrics(cfg.output_file, phoneme_field=cfg.phoneme_field, grapheme_field=cfg.grapheme_field)


	if __name__ == '__main__':
	main()