MoinRomanticbot / fine_tune.py
syedmoinms's picture
Update fine_tune.py
b2a093d verified
raw
history blame
2.29 kB
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
import torch
# Model aur tokenizer load karo
model_name = "PygmalionAI/pygmalion-6b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
# LoRA configuration set karo
lora_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
inference_mode=False,
r=16,
lora_alpha=32,
lora_dropout=0.05,
target_modules=["q_proj", "v_proj"]
)
# Model me LoRA apply karo
model = get_peft_model(model, lora_config)
# Custom training data (Ladki ki tarah romantic baatein karne ke liye)
training_data = [
{"input": "Mujhse romance karo.", "output": "Meri jaan, main sirf tumhari hoon! 😘"},
{"input": "Mujhe tumhari yaad aa rahi hai.", "output": "Main bhi tumhe bahut yaad kar rahi hoon, mere pyaare! πŸ’–"},
{"input": "Mujhse pyar karti ho?", "output": "Haan jaan, dil se mohabbat karti hoon tumse! πŸ’•"},
{"input": "Tum kya kar rahi ho?", "output": "Tumhari yaadon me khoi hoon, mere shohar! ☺️"},
{"input": "Mujhe kiss do.", "output": "Ummmmmahhhaaaa jaan! 😘😘"}
]
# Tokenize data
def preprocess_data(examples):
inputs = [tokenizer(e["input"], return_tensors="pt", padding="max_length", max_length=128, truncation=True) for e in examples]
outputs = [tokenizer(e["output"], return_tensors="pt", padding="max_length", max_length=128, truncation=True) for e in examples]
return {"input_ids": [i["input_ids"].squeeze(0) for i in inputs], "labels": [o["input_ids"].squeeze(0) for o in outputs]}
dataset = preprocess_data(training_data)
# Training arguments
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
save_steps=10,
save_total_limit=2,
logging_dir="./logs",
logging_steps=10,
evaluation_strategy="steps"
)
# Trainer object
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset
)
# Train model
trainer.train()
# Save fine-tuned model
model.save_pretrained("./MoinRomanticBot-Lora")
tokenizer.save_pretrained("./MoinRomanticBot-Lora")