sana-cpu

Running

App Files Files Community

sana-cpu / diffusion /model /nets /fastlinear /modules /lite_mla.py

gen6scp

Patched codes for ZeroGPU

d643072 5 months ago

raw

history blame contribute delete

3.33 kB

	# Copyright 2024 MIT Han Lab
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	# SPDX-License-Identifier: Apache-2.0

	import os
	from typing import Optional, Tuple

	import torch
	from torch import nn
	from torch.nn import functional as F


	class LiteMLA(nn.Module):
	r"""Lightweight multiscale linear attention"""

	PAD_VAL = 1

	def __init__(
	self,
	in_dim: int,
	out_dim: int,
	heads: Optional[int] = None,
	heads_ratio: float = 1.0,
	dim=32,
	kernel_func="relu",
	scales: Optional[Tuple[int]] = (5,),
	eps=1e-15,
	use_bias=False,
	norm=(None, "bn2d"),
	act=(None, None),
	):
	heads = heads or int(out_dim // dim * heads_ratio)
	super().__init__()

	self.in_dim = in_dim
	self.out_dim = out_dim
	self.heads = heads
	self.dim = dim
	self.scales = scales
	self.eps = eps

	self.aggreg = None
	scales = ()
	self.kernel_func = nn.ReLU(inplace=False)

	self.qkv = nn.Linear(in_dim, in_dim * 3, bias=use_bias)
	self.proj = nn.Linear(out_dim, out_dim)

	@torch.cuda.amp.autocast(enabled=os.environ.get("AUTOCAST_LINEAR_ATTN", False) == "true")
	def attn_matmul(self, q, k, v: torch.Tensor) -> torch.Tensor:
	# lightweight linear attention
	q = self.kernel_func(q) # B, h, h_d, N
	k = self.kernel_func(k)

	use_fp32_attention = getattr(self, "fp32_attention", False) # necessary for NAN loss
	if use_fp32_attention:
	q, k, v = q.float(), k.float(), v.float()
	v = F.pad(v, (0, 0, 0, 1), mode="constant", value=LiteMLA.PAD_VAL)
	vk = torch.matmul(v, k)
	out = torch.matmul(vk, q)
	if out.dtype in [torch.float16, torch.bfloat16]:
	out = out.float()
	out = out[:, :, :-1] / (out[:, :, -1:] + self.eps)

	return out

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	B, N, C = x.shape
	qkv = self.qkv(x).reshape(B, N, 3, C).permute(0, 2, 3, 1)
	# B, 3, C, N --> B, C, N
	q, k, v = qkv.unbind(1)
	dtype = q.dtype

	q = q.reshape(B, C // self.dim, self.dim, N) # b, h, h_d, N
	k = k.reshape(B, C // self.dim, self.dim, N).transpose(-1, -2) # b, h, N, h_d
	v = v.reshape(B, C // self.dim, self.dim, N) # b, h, h_d, N

	out = self.attn_matmul(q, k, v).to(dtype)

	out = out.view(B, C, N).permute(0, 2, 1) # B, N, C
	out = self.proj(out)

	return out

	@property
	def module_str(self) -> str:
	_str = type(self).__name__ + "("
	eps = f"{self.eps:.1E}"
	_str += f"i={self.in_dim},o={self.out_dim},h={self.heads},d={self.dim},eps={eps}"
	return _str

	def __repr__(self):
	return f"EPS{self.eps}-" + super().__repr__()