Fix: flash_attention_2 mask
Browse files- modeling_eurobert.py +11 -11
modeling_eurobert.py
CHANGED
@@ -26,15 +26,15 @@ import torch
|
|
26 |
from torch import nn
|
27 |
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
28 |
|
29 |
-
from
|
30 |
-
from
|
31 |
-
from
|
32 |
-
from
|
33 |
-
from
|
34 |
-
from
|
35 |
-
from
|
36 |
-
from
|
37 |
-
from
|
38 |
from .configuration_eurobert import EuroBertConfig
|
39 |
|
40 |
|
@@ -224,7 +224,7 @@ EUROBERT_START_DOCSTRING = r"""
|
|
224 |
|
225 |
|
226 |
@add_start_docstrings(
|
227 |
-
"The bare
|
228 |
EUROBERT_START_DOCSTRING,
|
229 |
)
|
230 |
class EuroBertPreTrainedModel(PreTrainedModel):
|
@@ -523,7 +523,7 @@ class EuroBertModel(EuroBertPreTrainedModel):
|
|
523 |
if inputs_embeds is None:
|
524 |
inputs_embeds = self.embed_tokens(input_ids)
|
525 |
|
526 |
-
if attention_mask is not None:
|
527 |
mask = self.mask_converter.to_4d(attention_mask, attention_mask.shape[1], inputs_embeds.dtype)
|
528 |
else:
|
529 |
mask = None
|
|
|
26 |
from torch import nn
|
27 |
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
28 |
|
29 |
+
from ...activations import ACT2FN
|
30 |
+
from ...cache_utils import Cache, StaticCache
|
31 |
+
from ...modeling_attn_mask_utils import AttentionMaskConverter
|
32 |
+
from ...modeling_flash_attention_utils import FlashAttentionKwargs
|
33 |
+
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPast, MaskedLMOutput, SequenceClassifierOutput
|
34 |
+
from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS
|
35 |
+
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
36 |
+
from ...processing_utils import Unpack
|
37 |
+
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
|
38 |
from .configuration_eurobert import EuroBertConfig
|
39 |
|
40 |
|
|
|
224 |
|
225 |
|
226 |
@add_start_docstrings(
|
227 |
+
"The bare EuroBERT Model outputting raw hidden-states without any specific head on top.",
|
228 |
EUROBERT_START_DOCSTRING,
|
229 |
)
|
230 |
class EuroBertPreTrainedModel(PreTrainedModel):
|
|
|
523 |
if inputs_embeds is None:
|
524 |
inputs_embeds = self.embed_tokens(input_ids)
|
525 |
|
526 |
+
if attention_mask is not None and self.config._attn_implementation != "flash_attention_2":
|
527 |
mask = self.mask_converter.to_4d(attention_mask, attention_mask.shape[1], inputs_embeds.dtype)
|
528 |
else:
|
529 |
mask = None
|