Ruurd commited on
Commit
620a6cd
·
verified ·
1 Parent(s): e237f80

Change LoRA size from 256 to 512, also back to bidirectional_masked

Browse files
Files changed (1) hide show
  1. llama_diffusion_model.py +3 -3
llama_diffusion_model.py CHANGED
@@ -192,7 +192,7 @@ class CustomTransformerModel(PreTrainedModel):
192
  self.llama.resize_token_embeddings(config.vocab_size)
193
 
194
  for i, layer in enumerate(self.llama.model.layers):
195
- layer.self_attn = BidirectionalLlamaAttention(layer.self_attn, masking='bidirectional')
196
 
197
  # Freeze Llama to retain pre-trained knowledge
198
  for param in self.llama.parameters():
@@ -202,8 +202,8 @@ class CustomTransformerModel(PreTrainedModel):
202
  param.requires_grad = True
203
 
204
  lora_config = LoraConfig(
205
- r=256,
206
- lora_alpha=256,
207
  lora_dropout=0.0,
208
  target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], # Llama-3 uses these attention modules
209
  bias="none",
 
192
  self.llama.resize_token_embeddings(config.vocab_size)
193
 
194
  for i, layer in enumerate(self.llama.model.layers):
195
+ layer.self_attn = BidirectionalLlamaAttention(layer.self_attn, masking='bidirectional_masked')
196
 
197
  # Freeze Llama to retain pre-trained knowledge
198
  for param in self.llama.parameters():
 
202
  param.requires_grad = True
203
 
204
  lora_config = LoraConfig(
205
+ r=512,
206
+ lora_alpha=512,
207
  lora_dropout=0.0,
208
  target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], # Llama-3 uses these attention modules
209
  bias="none",