Spaces:
Sleeping
Sleeping
File size: 1,534 Bytes
6fc683c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
#!/usr/bin/env bash
set -x
set -e
DIR="$( cd "$( dirname "$0" )" && cd .. && pwd )"
echo "working directory: ${DIR}"
if [ -z "$OUTPUT_DIR" ]; then
OUTPUT_DIR="${DIR}/checkpoint/rlm_$(date +%F-%H%M.%S)"
fi
if [ -z "$DATA_DIR" ]; then
DATA_DIR="${DIR}/data/msmarco_bm25_official/"
fi
mkdir -p "${OUTPUT_DIR}"
PROC_PER_NODE=$(nvidia-smi --list-gpus | wc -l)
# python -u -m torch.distributed.launch --nproc_per_node ${PROC_PER_NODE} src/train_rlm.py \
deepspeed src/train_rlm.py --deepspeed ds_config.json \
--model_name_or_path bert-base-uncased \
--per_device_train_batch_size 64 \
--per_device_eval_batch_size 64 \
--gradient_accumulation_steps 4 \
--seed 45678 \
--do_train \
--do_eval \
--fp16 \
--train_file "${DATA_DIR}/passages.jsonl.gz" \
--rlm_max_length 144 \
--rlm_encoder_mask_prob 0.3 \
--rlm_decoder_mask_prob 0.5 \
--rlm_generator_model_name google/electra-base-generator \
--rlm_freeze_generator True \
--rlm_generator_mlm_weight 0.2 \
--all_use_mask_token True \
--dataloader_num_workers 1 \
--max_steps 80000 \
--learning_rate 3e-4 \
--warmup_steps 4000 \
--weight_decay 0.0 \
--remove_unused_columns False \
--logging_steps 50 \
--report_to none \
--output_dir "${OUTPUT_DIR}" \
--save_total_limit 20 \
--save_strategy steps \
--save_steps 10000 \
--evaluation_strategy steps \
--eval_steps 10000 \
--data_dir "${DATA_DIR}" \
--overwrite_output_dir \
--disable_tqdm True "$@"
|