Spaces:
Running
Running
import random | |
import torch | |
import logging | |
import multiprocessing | |
import numpy as np | |
logger = logging.getLogger(__name__) | |
def add_args(parser): | |
parser.add_argument( | |
"--task", | |
type=str, | |
required=False, | |
choices=[ | |
"review", | |
], | |
) | |
parser.add_argument( | |
"--model_type", | |
default="codet5", | |
type=str, | |
choices=["roberta", "t5", "bart", "codet5", "scratch"], | |
) | |
parser.add_argument("--add_lang_ids", action="store_true") | |
parser.add_argument("--from_scratch", action="store_true") | |
parser.add_argument("--debug", action="store_true") | |
parser.add_argument("--start_epoch", default=0, type=int) | |
parser.add_argument("--train_epochs", default=10, type=int) | |
parser.add_argument("--tokenizer_path", type=str, required=False) | |
parser.add_argument( | |
"--output_dir", | |
default=None, | |
type=str, | |
required=False, | |
help="The output directory where the model predictions and checkpoints will be written.", | |
) | |
parser.add_argument( | |
"--load_model_path", | |
default=None, | |
type=str, | |
required=False | |
) | |
parser.add_argument( | |
"--model_name_or_path", | |
default=None, | |
type=str, | |
help="Path to trained model: Should contain the .bin files", | |
) | |
## Other parameters | |
parser.add_argument( | |
"--train_path", | |
default=None, | |
type=str, | |
help="The pretrain files path. Should contain the .jsonl files for this task.", | |
) | |
parser.add_argument( | |
"--eval_chunkname", | |
default=None, | |
type=str, | |
help="The eval file name.", | |
) | |
parser.add_argument( | |
"--train_filename", | |
default=None, | |
type=str, | |
help="The train filename. Should contain the .jsonl files for this task.", | |
) | |
parser.add_argument( | |
"--dev_filename", | |
default=None, | |
type=str, | |
help="The dev filename. Should contain the .jsonl files for this task.", | |
) | |
parser.add_argument( | |
"--test_filename", | |
default=None, | |
type=str, | |
help="The test filename. Should contain the .jsonl files for this task.", | |
) | |
parser.add_argument( | |
"--gold_filename", | |
default=None, | |
type=str, | |
help="The gold filename. Should contain the .jsonl files for this task.", | |
) | |
parser.add_argument( | |
"--config_name", | |
default="Salesforce/codet5-base", | |
type=str, | |
help="Pretrained config name or path if not the same as model_name", | |
) | |
parser.add_argument( | |
"--max_source_length", | |
default=64, | |
type=int, | |
help="The maximum total source sequence length after tokenization. Sequences longer " | |
"than this will be truncated, sequences shorter will be padded.", | |
) | |
parser.add_argument( | |
"--max_target_length", | |
default=32, | |
type=int, | |
help="The maximum total target sequence length after tokenization. Sequences longer " | |
"than this will be truncated, sequences shorter will be padded.", | |
) | |
parser.add_argument( | |
"--do_train", action="store_true", help="Whether to run eval on the train set." | |
) | |
parser.add_argument( | |
"--do_eval", action="store_true", help="Whether to run eval on the dev set." | |
) | |
parser.add_argument( | |
"--do_test", action="store_true", help="Whether to run eval on the dev set." | |
) | |
parser.add_argument( | |
"--raw_input", action="store_true", help="Whether to use simple input format (set for baselines)." | |
) | |
parser.add_argument( | |
"--do_lower_case", | |
action="store_true", | |
help="Set this flag if you are using an uncased model.", | |
) | |
parser.add_argument( | |
"--no_cuda", action="store_true", help="Avoid using CUDA when available" | |
) | |
parser.add_argument( | |
"--train_batch_size", | |
default=8, | |
type=int, | |
help="Batch size per GPU/CPU for training.", | |
) | |
parser.add_argument( | |
"--eval_batch_size", | |
default=8, | |
type=int, | |
help="Batch size per GPU/CPU for evaluation.", | |
) | |
parser.add_argument( | |
"--gradient_accumulation_steps", | |
type=int, | |
default=1, | |
help="Number of updates steps to accumulate before performing a backward/update pass.", | |
) | |
parser.add_argument( | |
"--learning_rate", | |
default=5e-5, | |
type=float, | |
help="The initial learning rate for Adam.", | |
) | |
parser.add_argument( | |
"--mask_rate", default=0.15, type=float, help="The masked rate of input lines.", | |
) | |
parser.add_argument( | |
"--beam_size", default=6, type=int, help="beam size for beam search" | |
) | |
parser.add_argument( | |
"--weight_decay", default=0.0, type=float, help="Weight deay if we apply some." | |
) | |
parser.add_argument( | |
"--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer." | |
) | |
parser.add_argument( | |
"--max_grad_norm", default=1.0, type=float, help="Max gradient norm." | |
) | |
parser.add_argument( | |
"--save_steps", default=-1, type=int, | |
) | |
parser.add_argument( | |
"--log_steps", default=-1, type=int, | |
) | |
parser.add_argument("--eval_steps", default=-1, type=int, help="") | |
parser.add_argument("--eval_file", default="", type=str) | |
parser.add_argument("--out_file", default="", type=str) | |
parser.add_argument("--break_cnt", default=-1, type=int) | |
parser.add_argument("--train_steps", default=-1, type=int, help="") | |
parser.add_argument( | |
"--warmup_steps", default=100, type=int, help="Linear warmup over warmup_steps." | |
) | |
parser.add_argument( | |
"--gpu_per_node", | |
type=int, | |
default=4, | |
help="gpus per node", | |
) | |
parser.add_argument( | |
"--node_index", | |
type=int, | |
default=0, | |
help="For distributed training: node_index", | |
) | |
parser.add_argument( | |
"--local_rank", | |
type=int, | |
default=-1, | |
help="For distributed training: local_rank", | |
) | |
parser.add_argument( | |
"--seed", type=int, default=2233, help="random seed for initialization" | |
) # previous one 42 | |
# Or in configs.py if add_args is defined there | |
parser.add_argument( | |
"--clearml_train_dataset_id", | |
type=str, | |
default=None, | |
help="ClearML Dataset ID to fetch training data from. Overrides train_filename if provided.", | |
) | |
parser.add_argument( | |
"--clearml_valid_dataset_id", | |
type=str, | |
default=None, | |
help="ClearML Dataset ID to fetch validation data from. Overrides dev_filename if provided.", | |
) | |
args = parser.parse_args() | |
return args | |
def set_dist(args): | |
# Setup CUDA, GPU & distributed training | |
if args.local_rank == -1 or args.no_cuda: | |
device = torch.device( | |
"cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu" | |
) | |
args.n_gpu = torch.cuda.device_count() | |
else: | |
# Setup for distributed data parallel | |
torch.cuda.set_device(args.local_rank) | |
device = torch.device("cuda", args.local_rank) | |
torch.distributed.init_process_group(backend="nccl") | |
args.n_gpu = 1 | |
cpu_count = multiprocessing.cpu_count() | |
logger.warning( | |
"Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, cpu count: %d", | |
args.local_rank, | |
device, | |
args.n_gpu, | |
bool(args.local_rank != -1), | |
cpu_count, | |
) | |
args.device = device | |
args.cpu_count = cpu_count | |
def set_seed(args): | |
"""set random seed.""" | |
random.seed(args.seed) | |
np.random.seed(args.seed) | |
torch.manual_seed(args.seed) | |
# if args.n_gpu > 0: | |
torch.cuda.manual_seed_all(args.seed) | |