Ruurd commited on
Commit
e237f80
·
verified ·
1 Parent(s): 0daaccf

Changed back to bidirectional attention

Browse files
Files changed (1) hide show
  1. llama_diffusion_model.py +1 -1
llama_diffusion_model.py CHANGED
@@ -192,7 +192,7 @@ class CustomTransformerModel(PreTrainedModel):
192
  self.llama.resize_token_embeddings(config.vocab_size)
193
 
194
  for i, layer in enumerate(self.llama.model.layers):
195
- layer.self_attn = BidirectionalLlamaAttention(layer.self_attn, masking='bidirectional_masked')
196
 
197
  # Freeze Llama to retain pre-trained knowledge
198
  for param in self.llama.parameters():
 
192
  self.llama.resize_token_embeddings(config.vocab_size)
193
 
194
  for i, layer in enumerate(self.llama.model.layers):
195
+ layer.self_attn = BidirectionalLlamaAttention(layer.self_attn, masking='bidirectional')
196
 
197
  # Freeze Llama to retain pre-trained knowledge
198
  for param in self.llama.parameters():