Minh Q. Le
Push data
b63a495
raw
history blame contribute delete
991 Bytes
{
"gpu_mode": "T",
"gpu_index": 0,
"gpu_indices": [0, 1],
"multigpu": "F",
"topk_size": 10,
"beam_size": 1,
"gen_seqlength": 40,
"eval_sampler": "greedy",
"num_sequences": 1,
"generate_sequences": 1000,
"evaluate_sequences": 10000,
"random_seed": 123,
"optimizer": "adam",
"batch_size": 64,
"learning_rate": 6.25e-5,
"clip": 1,
"loss": "nll",
"weight_decay": 0,
"adam": {
"b2": 0.999,
"b1": 0.9,
"e": 1e-8
},
"model": "transformer",
"pretrain": "gpt",
"hidden_dim": 768,
"num_layers": 12,
"num_heads": 12,
"embedding_dropout": 0.1,
"attention_dropout": 0.1,
"residual_dropout": 0.1,
"output_dropout": 0.1,
"activation": "gelu",
"init": "pt",
"trainer": "iteration",
"iterations": 50000,
"cycle": 500,
"save_strategy": "best",
"epochs": 20,
"toy": "F",
"do_gen": "F",
"save": "T",
"test_save": "F"
}