Minh Q. Le
Push data
b63a495
raw
history blame contribute delete
1.44 kB
{
"gpu_mode": "T",
"gpu_index": 0,
"gpu_indices": [
0,
1
],
"multigpu": "F",
"topk_size": 10,
"beam_size": 1,
"gen_seqlength": 40,
"eval_sampler": "greedy",
"num_sequences": 1,
"generate_sequences": "full",
"evaluate_sequences": "full",
"random_seed": 123,
"optimizer": "adam",
"batch_size": 64,
"learning_rate": 1e-05,
"clip": 1,
"loss": "nll",
"weight_decay": 0,
"adam": {
"b2": 0.999,
"b1": 0.9,
"e": 1e-08
},
"model": "transformer",
"pretrain": "gpt",
"hidden_dim": 768,
"num_layers": 12,
"num_heads": 12,
"embedding_dropout": 0.1,
"attention_dropout": 0.1,
"residual_dropout": 0.1,
"output_dropout": 0.1,
"activation": "gelu",
"init": "pt",
"trainer": "iteration",
"iterations": 100000,
"cycle": 500,
"save_strategy": "best",
"epochs": 20,
"toy": "F",
"do_gen": "T",
"save": "T",
"test_save": "F",
"dataset": "conceptnet",
"exp": "generation",
"encoder_path": "model/encoder_bpe_40000.json",
"bpe_path": "model/vocab_40000.bpe",
"learning_rate_schedule": "warmup_linear",
"learning_rate_warmup": 0.002,
"l2": 0.01,
"vector_l2": "T",
"relation_format": "language",
"training_set_size": 100,
"development_set_versions_to_use": "12",
"max_event_1_size": 10,
"max_event_2_size": 15
}