|
import torch |
|
import accelerate.accelerator |
|
|
|
from diffusers.models.normalization import RMSNorm, LayerNorm, FP32LayerNorm, AdaLayerNormContinuous |
|
|
|
|
|
accelerate.accelerator.convert_outputs_to_fp32 = lambda x: x |
|
|
|
|
|
def LayerNorm_forward(self, x): |
|
return torch.nn.functional.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps).to(x) |
|
|
|
|
|
LayerNorm.forward = LayerNorm_forward |
|
torch.nn.LayerNorm.forward = LayerNorm_forward |
|
|
|
|
|
def FP32LayerNorm_forward(self, x): |
|
origin_dtype = x.dtype |
|
return torch.nn.functional.layer_norm( |
|
x.float(), |
|
self.normalized_shape, |
|
self.weight.float() if self.weight is not None else None, |
|
self.bias.float() if self.bias is not None else None, |
|
self.eps, |
|
).to(origin_dtype) |
|
|
|
|
|
FP32LayerNorm.forward = FP32LayerNorm_forward |
|
|
|
|
|
def RMSNorm_forward(self, hidden_states): |
|
input_dtype = hidden_states.dtype |
|
variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True) |
|
hidden_states = hidden_states * torch.rsqrt(variance + self.eps) |
|
|
|
if self.weight is None: |
|
return hidden_states.to(input_dtype) |
|
|
|
return hidden_states.to(input_dtype) * self.weight.to(input_dtype) |
|
|
|
|
|
RMSNorm.forward = RMSNorm_forward |
|
|
|
|
|
def AdaLayerNormContinuous_forward(self, x, conditioning_embedding): |
|
emb = self.linear(self.silu(conditioning_embedding)) |
|
scale, shift = emb.chunk(2, dim=1) |
|
x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :] |
|
return x |
|
|
|
|
|
AdaLayerNormContinuous.forward = AdaLayerNormContinuous_forward |
|
|