mtasic85 commited on
Commit
9105382
·
1 Parent(s): 323d75d

pretrain core 0

Browse files
Files changed (2) hide show
  1. README.md +6 -1
  2. scripts/pretrain-core-model-0.yaml +2 -2
README.md CHANGED
@@ -53,7 +53,12 @@ time python -B prepare_core_datasets.py
53
  ```
54
 
55
  ```
56
- # ...
 
 
 
 
 
57
  ```
58
 
59
  ```bash
 
53
  ```
54
 
55
  ```
56
+ i=0, min_len=0, max_len=1048576, block_size=2049, chunk_size=16392000, len(dataset)=3134311, len(dataset) * block_size=6422203239
57
+ Total number of tokens in the optimized dataset '../core-data-0-0-1048576-2049-8000' is 6422203239
58
+ i=1, min_len=2049, max_len=8193, block_size=8193, chunk_size=16386000, len(dataset)=179944, len(dataset) * block_size=1474281192
59
+ Total number of tokens in the optimized dataset '../core-data-1-2049-8193-8193-2000' is 1474281192
60
+ i=2, min_len=8193, max_len=1048577, block_size=32769, chunk_size=16384500, len(dataset)=48261, len(dataset) * block_size=1581464709
61
+ Total number of tokens in the optimized dataset '../core-data-2-8193-1048577-32769-500' is 1581464709
62
  ```
63
 
64
  ```bash
scripts/pretrain-core-model-0.yaml CHANGED
@@ -46,7 +46,7 @@ data:
46
  class_path: LitData
47
 
48
  init_args:
49
- data_path: "../core-data-0-8192-2000/"
50
  num_workers: 32
51
 
52
  # Training-related arguments. See ``litgpt.args.TrainArgs`` for details
@@ -74,7 +74,7 @@ train:
74
  epochs:
75
 
76
  # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
77
- max_tokens: ???
78
 
79
  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
80
  max_steps:
 
46
  class_path: LitData
47
 
48
  init_args:
49
+ data_path: "../core-data-0-0-1048576-2049-8000/"
50
  num_workers: 32
51
 
52
  # Training-related arguments. See ``litgpt.args.TrainArgs`` for details
 
74
  epochs:
75
 
76
  # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
77
+ max_tokens: 6422203239
78
 
79
  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
80
  max_steps: