mjschock commited on
Commit
8ca2c5b
·
unverified ·
1 Parent(s): 04d059b

Add DataCollatorForLanguageModeling to trainer configuration in train.py for improved data handling during training.

Browse files
Files changed (1) hide show
  1. train.py +8 -2
train.py CHANGED
@@ -34,6 +34,7 @@ from datasets import (
34
  )
35
  from transformers import AutoTokenizer, Trainer, TrainingArguments
36
  from trl import SFTTrainer
 
37
 
38
  # Configuration
39
  max_seq_length = 2048 # Auto supports RoPE Scaling internally
@@ -211,8 +212,8 @@ def create_trainer(
211
  logging_steps=1,
212
  save_strategy="steps",
213
  save_steps=30,
214
- eval_strategy="steps", # Match save_strategy
215
- eval_steps=30, # Match save_steps
216
  save_total_limit=2,
217
  optim="adamw_8bit",
218
  weight_decay=0.01,
@@ -224,6 +225,11 @@ def create_trainer(
224
  metric_for_best_model="eval_loss",
225
  greater_is_better=False,
226
  ),
 
 
 
 
 
227
  )
228
  logger.info("Trainer created successfully")
229
  return trainer
 
34
  )
35
  from transformers import AutoTokenizer, Trainer, TrainingArguments
36
  from trl import SFTTrainer
37
+ from trl.data.data_collator import DataCollatorForLanguageModeling
38
 
39
  # Configuration
40
  max_seq_length = 2048 # Auto supports RoPE Scaling internally
 
212
  logging_steps=1,
213
  save_strategy="steps",
214
  save_steps=30,
215
+ eval_strategy="steps",
216
+ eval_steps=30,
217
  save_total_limit=2,
218
  optim="adamw_8bit",
219
  weight_decay=0.01,
 
225
  metric_for_best_model="eval_loss",
226
  greater_is_better=False,
227
  ),
228
+ data_collator=DataCollatorForLanguageModeling(
229
+ tokenizer=tokenizer,
230
+ mlm=False,
231
+ pad_to_multiple_of=8,
232
+ ),
233
  )
234
  logger.info("Trainer created successfully")
235
  return trainer