Spaces:
Running
on
Zero
Running
on
Zero
Turn autocast back on
Browse files- llama_diffusion_model.py +8 -8
llama_diffusion_model.py
CHANGED
@@ -137,14 +137,14 @@ class CustomTransformerModel(PreTrainedModel):
|
|
137 |
attention_mask = attention_mask.to(dtype=torch.float32) # required for SDPA and Flash attention
|
138 |
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
|
149 |
logits = outputs.logits[:, :, :self.config.vocab_size].view(batch_size, seq_len, self.config.vocab_size)
|
150 |
|
|
|
137 |
attention_mask = attention_mask.to(dtype=torch.float32) # required for SDPA and Flash attention
|
138 |
|
139 |
|
140 |
+
with autocast("cuda", dtype=torch.float16):
|
141 |
+
outputs = self.llama(
|
142 |
+
input_ids,
|
143 |
+
attention_mask=attention_mask,
|
144 |
+
output_hidden_states=True,
|
145 |
+
use_cache=False,
|
146 |
+
**kwargs
|
147 |
+
)
|
148 |
|
149 |
logits = outputs.logits[:, :, :self.config.vocab_size].view(batch_size, seq_len, self.config.vocab_size)
|
150 |
|