Mariam-Elz commited on
Commit
5680571
·
verified ·
1 Parent(s): 909930f

Upload launch_train.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. launch_train.sh +41 -0
launch_train.sh ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # set default values for the environment variables
4
+ export OMP_NUM_THREADS=8
5
+ if [ -z "$ADDR" ]
6
+ then
7
+ export ADDR=127.0.0.1
8
+ fi
9
+
10
+ if [ -z "$WORLD_SIZE" ]
11
+ then
12
+ export WORLD_SIZE=1
13
+ fi
14
+
15
+ if [ -z "$RANK" ]
16
+ then
17
+ export RANK=0
18
+ fi
19
+
20
+ if [ -z "$MASTER_PORT" ]
21
+ then
22
+ export MASTER_PORT=29501
23
+ fi
24
+
25
+ export WANDB_MODE=offline
26
+ accelerate_args="--config_file acce.yaml --num_machines $WORLD_SIZE \
27
+ --machine_rank $RANK --num_processes 1 \
28
+ --main_process_port $MASTER_PORT \
29
+ --main_process_ip $ADDR"
30
+ echo $accelerate_args
31
+
32
+ # train stage 1
33
+ accelerate launch $accelerate_args train.py --config configs/nf7_v3_SNR_rd_size_stroke_train.yaml \
34
+ config.batch_size=1 \
35
+ config.eval_interval=100
36
+
37
+
38
+ # train stage 2
39
+ # accelerate launch $accelerate_args train_stage2.py --config configs/stage2-v2-snr_train.yaml \
40
+ # config.batch_size=1 \
41
+ # config.eval_interval=100