import random import torch import logging import multiprocessing import numpy as np logger = logging.getLogger(__name__) def add_args(parser): parser.add_argument( "--task", type=str, required=False, choices=[ "review", ], ) parser.add_argument( "--model_type", default="codet5", type=str, choices=["roberta", "t5", "bart", "codet5", "scratch"], ) parser.add_argument("--add_lang_ids", action="store_true") parser.add_argument("--from_scratch", action="store_true") parser.add_argument("--debug", action="store_true") parser.add_argument("--start_epoch", default=0, type=int) parser.add_argument("--train_epochs", default=10, type=int) parser.add_argument("--tokenizer_path", type=str, required=False) parser.add_argument( "--output_dir", default=None, type=str, required=False, help="The output directory where the model predictions and checkpoints will be written.", ) parser.add_argument( "--load_model_path", default=None, type=str, required=False ) parser.add_argument( "--model_name_or_path", default=None, type=str, help="Path to trained model: Should contain the .bin files", ) ## Other parameters parser.add_argument( "--train_path", default=None, type=str, help="The pretrain files path. Should contain the .jsonl files for this task.", ) parser.add_argument( "--eval_chunkname", default=None, type=str, help="The eval file name.", ) parser.add_argument( "--train_filename", default=None, type=str, help="The train filename. Should contain the .jsonl files for this task.", ) parser.add_argument( "--dev_filename", default=None, type=str, help="The dev filename. Should contain the .jsonl files for this task.", ) parser.add_argument( "--test_filename", default=None, type=str, help="The test filename. Should contain the .jsonl files for this task.", ) parser.add_argument( "--gold_filename", default=None, type=str, help="The gold filename. Should contain the .jsonl files for this task.", ) parser.add_argument( "--config_name", default="Salesforce/codet5-base", type=str, help="Pretrained config name or path if not the same as model_name", ) parser.add_argument( "--max_source_length", default=64, type=int, help="The maximum total source sequence length after tokenization. Sequences longer " "than this will be truncated, sequences shorter will be padded.", ) parser.add_argument( "--max_target_length", default=32, type=int, help="The maximum total target sequence length after tokenization. Sequences longer " "than this will be truncated, sequences shorter will be padded.", ) parser.add_argument( "--do_train", action="store_true", help="Whether to run eval on the train set." ) parser.add_argument( "--do_eval", action="store_true", help="Whether to run eval on the dev set." ) parser.add_argument( "--do_test", action="store_true", help="Whether to run eval on the dev set." ) parser.add_argument( "--raw_input", action="store_true", help="Whether to use simple input format (set for baselines)." ) parser.add_argument( "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model.", ) parser.add_argument( "--no_cuda", action="store_true", help="Avoid using CUDA when available" ) parser.add_argument( "--train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.", ) parser.add_argument( "--eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.", ) parser.add_argument( "--gradient_accumulation_steps", type=int, default=1, help="Number of updates steps to accumulate before performing a backward/update pass.", ) parser.add_argument( "--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.", ) parser.add_argument( "--mask_rate", default=0.15, type=float, help="The masked rate of input lines.", ) parser.add_argument( "--beam_size", default=6, type=int, help="beam size for beam search" ) parser.add_argument( "--weight_decay", default=0.0, type=float, help="Weight deay if we apply some." ) parser.add_argument( "--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer." ) parser.add_argument( "--max_grad_norm", default=1.0, type=float, help="Max gradient norm." ) parser.add_argument( "--save_steps", default=-1, type=int, ) parser.add_argument( "--log_steps", default=-1, type=int, ) parser.add_argument("--eval_steps", default=-1, type=int, help="") parser.add_argument("--eval_file", default="", type=str) parser.add_argument("--out_file", default="", type=str) parser.add_argument("--break_cnt", default=-1, type=int) parser.add_argument("--train_steps", default=-1, type=int, help="") parser.add_argument( "--warmup_steps", default=100, type=int, help="Linear warmup over warmup_steps." ) parser.add_argument( "--gpu_per_node", type=int, default=4, help="gpus per node", ) parser.add_argument( "--node_index", type=int, default=0, help="For distributed training: node_index", ) parser.add_argument( "--local_rank", type=int, default=-1, help="For distributed training: local_rank", ) parser.add_argument( "--seed", type=int, default=2233, help="random seed for initialization" ) # previous one 42 # Or in configs.py if add_args is defined there parser.add_argument( "--clearml_train_dataset_id", type=str, default=None, help="ClearML Dataset ID to fetch training data from. Overrides train_filename if provided.", ) parser.add_argument( "--clearml_valid_dataset_id", type=str, default=None, help="ClearML Dataset ID to fetch validation data from. Overrides dev_filename if provided.", ) args = parser.parse_args() return args def set_dist(args): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu" ) args.n_gpu = torch.cuda.device_count() else: # Setup for distributed data parallel torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 cpu_count = multiprocessing.cpu_count() logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, cpu count: %d", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), cpu_count, ) args.device = device args.cpu_count = cpu_count def set_seed(args): """set random seed.""" random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # if args.n_gpu > 0: torch.cuda.manual_seed_all(args.seed)