NeMo / examples /nlp /language_modeling /conf /megatron_retro_inference.yaml

thanks to NVIDIA ❤

7934b29 about 2 years ago

2.01 kB

	inference:
	greedy: False # Whether or not to use sampling ; use greedy decoding otherwise
	top_k: 0 # The number of highest probability vocabulary tokens to keep for top-k-filtering.
	top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
	temperature: 1.0 # sampling temperature
	add_BOS: True # add the bos token at the begining of the prompt
	tokens_to_generate: 30 # The minimum length of the sequence to be generated.
	all_probs: False # whether return the log prob for all the tokens in vocab
	repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty.
	min_tokens_to_generate: 0 # The minimum length of the sequence to be generated.
	compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False


	trainer:
	devices: 1
	num_nodes: 1
	accelerator: gpu
	logger: False # logger provided by exp_manager
	precision: 16 # 16, 32, or bf16

	inference_batch_size: 2
	tensor_model_parallel_size: 1
	pipeline_model_parallel_size: 1
	pipeline_model_parallel_split_rank: 0 # used for encoder and decoder model
	retro_model_file: null # RETRO nemo file path

	use_predict_method: False # whether to use the predict method

	prompts: # prompts for RETRO model inference
	- "hello,"
	- "good morning,"
	- "good afternoon,"
	- "good evening,"

	########### Faiss service parameters ########
	retrieval_service:
	strategy: RetroModelTextGenerationStrategy # choose customized inference strategy
	neighbors: 4
	frequent_query: False # for the current token generation, frequently update the retrieval context. If false, update it every 64 tokens
	pad_tokens: True # pad the tokens at the beginning to make it minimum of 64 tokens for retrieving at least once
	store_retrieved: False # whether store the retrieved documents, so it can be checked
	combo_service:
	service_ip: '0.0.0.0'
	service_port: 17181