Manireddy1508 commited on
Commit
96149a8
·
verified ·
1 Parent(s): a740776

Upload 2 files

Browse files
config/deepspeed/zero2_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bf16": {
3
+ "enabled": "auto"
4
+ },
5
+ "zero_optimization": {
6
+ "stage": 2,
7
+ "offload_optimizer": {
8
+ "device": "none"
9
+ },
10
+ "contiguous_gradients": true,
11
+ "overlap_comm": true
12
+ },
13
+ "train_micro_batch_size_per_gpu": 1,
14
+ "gradient_accumulation_steps": "auto"
15
+ }
config/deepspeed/zero3_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bf16": {
3
+ "enabled": "auto",
4
+ "loss_scale": 0,
5
+ "loss_scale_window": 1000,
6
+ "initial_scale_power": 16,
7
+ "hysteresis": 2,
8
+ "min_loss_scale": 1
9
+ },
10
+
11
+ "zero_optimization": {
12
+ "stage": 3,
13
+ "offload_optimizer": {
14
+ "device": "cpu",
15
+ "pin_memory": true
16
+ },
17
+ "offload_param": {
18
+ "device": "cpu",
19
+ "pin_memory": true
20
+ },
21
+ "overlap_comm": true,
22
+ "contiguous_gradients": true,
23
+ "reduce_bucket_size": 16777216,
24
+ "stage3_prefetch_bucket_size": 15099494,
25
+ "stage3_param_persistence_threshold": 40960,
26
+ "sub_group_size": 1e9,
27
+ "stage3_max_live_parameters": 1e9,
28
+ "stage3_max_reuse_distance": 1e9,
29
+ "stage3_gather_16bit_weights_on_model_save": true
30
+ },
31
+ "gradient_accumulation_steps": "auto",
32
+ "train_micro_batch_size_per_gpu": 1
33
+ }