vv876803 commited on
Commit
e985b51
·
verified ·
1 Parent(s): b420e25

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +55 -0
train.py CHANGED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
3
+ from datasets import load_dataset
4
+ from peft import LoraConfig, get_peft_model
5
+
6
+ # Model & Tokenizer
7
+ MODEL_NAME = "TinyLlama/TinyLlama-1.1B" # Adjust if using your own model
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16)
10
+
11
+ # Apply LoRA for Efficient Fine-Tuning
12
+ peft_config = LoraConfig(
13
+ r=8, # Low-rank adaptation size
14
+ lora_alpha=16,
15
+ lora_dropout=0.05,
16
+ bias="none",
17
+ task_type="CAUSAL_LM"
18
+ )
19
+ model = get_peft_model(model, peft_config)
20
+
21
+ # Load Dataset (OASST1)
22
+ dataset = load_dataset("OpenAssistant/oasst1", split="train[:10%]") # Use 10% of dataset
23
+
24
+ # Tokenization Function
25
+ def tokenize_function(examples):
26
+ return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
27
+
28
+ # Tokenize Dataset
29
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
30
+
31
+ # Training Arguments
32
+ training_args = TrainingArguments(
33
+ output_dir="./tinyllama-finetuned",
34
+ evaluation_strategy="epoch",
35
+ save_strategy="epoch",
36
+ per_device_train_batch_size=2, # Adjust for CPU
37
+ per_device_eval_batch_size=2,
38
+ num_train_epochs=3,
39
+ logging_dir="./logs",
40
+ report_to="none"
41
+ )
42
+
43
+ # Trainer
44
+ trainer = Trainer(
45
+ model=model,
46
+ args=training_args,
47
+ train_dataset=tokenized_datasets,
48
+ )
49
+
50
+ # Start Training
51
+ trainer.train()
52
+
53
+ # Save Fine-Tuned Model
54
+ model.save_pretrained("./tinyllama-finetuned")
55
+ tokenizer.save_pretrained("./tinyllama-finetuned")