Spaces:
Sleeping
Sleeping
Changed back to bidirectional attention
Browse files- llama_diffusion_model.py +1 -1
llama_diffusion_model.py
CHANGED
@@ -192,7 +192,7 @@ class CustomTransformerModel(PreTrainedModel):
|
|
192 |
self.llama.resize_token_embeddings(config.vocab_size)
|
193 |
|
194 |
for i, layer in enumerate(self.llama.model.layers):
|
195 |
-
layer.self_attn = BidirectionalLlamaAttention(layer.self_attn, masking='
|
196 |
|
197 |
# Freeze Llama to retain pre-trained knowledge
|
198 |
for param in self.llama.parameters():
|
|
|
192 |
self.llama.resize_token_embeddings(config.vocab_size)
|
193 |
|
194 |
for i, layer in enumerate(self.llama.model.layers):
|
195 |
+
layer.self_attn = BidirectionalLlamaAttention(layer.self_attn, masking='bidirectional')
|
196 |
|
197 |
# Freeze Llama to retain pre-trained knowledge
|
198 |
for param in self.llama.parameters():
|