syedmoinms commited on
Commit
24b7fd6
Β·
verified Β·
1 Parent(s): a1bbc6a

Update fine_tune.py

Browse files
Files changed (1) hide show
  1. fine_tune.py +15 -47
fine_tune.py CHANGED
@@ -1,65 +1,33 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
2
- from peft import get_peft_model, LoraConfig, TaskType
3
  import torch
4
 
5
- # Model aur tokenizer load karo
6
- model_name = "PygmalionAI/pygmalion-6b"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
9
 
10
- # LoRA configuration set karo
11
- lora_config = LoraConfig(
12
- task_type=TaskType.CAUSAL_LM,
13
- inference_mode=False,
14
- r=16,
15
- lora_alpha=32,
16
- lora_dropout=0.05,
17
- target_modules=["q_proj", "v_proj"]
18
- )
19
-
20
- # Model me LoRA apply karo
21
- model = get_peft_model(model, lora_config)
22
-
23
- # Custom training data (Ladki ki tarah romantic baatein karne ke liye)
24
- training_data = [
25
- {"input": "Mujhse romance karo.", "output": "Meri jaan, main sirf tumhari hoon! 😘"},
26
- {"input": "Mujhe tumhari yaad aa rahi hai.", "output": "Main bhi tumhe bahut yaad kar rahi hoon, mere pyaare! πŸ’–"},
27
- {"input": "Mujhse pyar karti ho?", "output": "Haan jaan, dil se mohabbat karti hoon tumse! πŸ’•"},
28
- {"input": "Tum kya kar rahi ho?", "output": "Tumhari yaadon me khoi hoon, mere shohar! ☺️"},
29
- {"input": "Mujhe kiss do.", "output": "Ummmmmahhhaaaa jaan! 😘😘"}
30
- ]
31
-
32
- # Tokenize data
33
- def preprocess_data(examples):
34
- inputs = [tokenizer(e["input"], return_tensors="pt", padding="max_length", max_length=128, truncation=True) for e in examples]
35
- outputs = [tokenizer(e["output"], return_tensors="pt", padding="max_length", max_length=128, truncation=True) for e in examples]
36
- return {"input_ids": [i["input_ids"].squeeze(0) for i in inputs], "labels": [o["input_ids"].squeeze(0) for o in outputs]}
37
-
38
- dataset = preprocess_data(training_data)
39
-
40
- # Training arguments
41
  training_args = TrainingArguments(
42
- output_dir="./results",
43
- num_train_epochs=3,
44
  per_device_train_batch_size=1,
45
  per_device_eval_batch_size=1,
46
- save_steps=10,
47
- save_total_limit=2,
48
- logging_dir="./logs",
49
  logging_steps=10,
50
- evaluation_strategy="steps"
 
 
 
 
 
51
  )
52
 
53
- # Trainer object
54
  trainer = Trainer(
55
  model=model,
56
  args=training_args,
57
- train_dataset=dataset
 
58
  )
59
 
60
- # Train model
61
  trainer.train()
62
-
63
- # Save fine-tuned model
64
  model.save_pretrained("./MoinRomanticBot-Lora")
65
  tokenizer.save_pretrained("./MoinRomanticBot-Lora")
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
 
2
  import torch
3
 
4
+ model_name = "TheBloke/Pygmalion-7B-GPTQ"
 
5
  tokenizer = AutoTokenizer.from_pretrained(model_name)
6
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  training_args = TrainingArguments(
9
+ output_dir="./MoinRomanticBot-Lora",
 
10
  per_device_train_batch_size=1,
11
  per_device_eval_batch_size=1,
12
+ evaluation_strategy="steps",
13
+ save_strategy="steps",
14
+ save_steps=100,
15
  logging_steps=10,
16
+ learning_rate=5e-5,
17
+ weight_decay=0.01,
18
+ warmup_steps=100,
19
+ num_train_epochs=1,
20
+ save_total_limit=1,
21
+ push_to_hub=False
22
  )
23
 
 
24
  trainer = Trainer(
25
  model=model,
26
  args=training_args,
27
+ train_dataset=None, # Add your training dataset
28
+ eval_dataset=None, # Add your evaluation dataset
29
  )
30
 
 
31
  trainer.train()
 
 
32
  model.save_pretrained("./MoinRomanticBot-Lora")
33
  tokenizer.save_pretrained("./MoinRomanticBot-Lora")