3DTopia2

Runtime error

HongFangzhou

add source codes

bc2085d over 1 year ago

18.9 kB

	# adopted from
	# https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py
	# and
	# https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py
	# and
	# https://github.com/openai/guided-diffusion/blob/0ba878e517b276c45d1195eb29f6f5f72659a05b/guided_diffusion/nn.py
	#
	# thanks!

	# zero123/zero123/ldm/modules/diffusionmodules/util.py
	import os
	import math
	import torch
	import torch.nn as nn
	import numpy as np
	from einops import repeat


	def instantiate_from_config(config):
	if not "target" in config:
	if config == '__is_first_stage__':
	return None
	elif config == "__is_unconditional__":
	return None
	raise KeyError("Expected key `target` to instantiate.")
	return get_obj_from_str(config["target"])(**config.get("params", dict()))


	def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
	if schedule == "linear":
	betas = (
	torch.linspace(linear_start 0.5, linear_end 0.5, n_timestep, dtype=torch.float64) ** 2
	)

	elif schedule == "cosine":
	timesteps = (
	torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
	)
	alphas = timesteps / (1 + cosine_s) * np.pi / 2
	alphas = torch.cos(alphas).pow(2)
	alphas = alphas / alphas[0]
	betas = 1 - alphas[1:] / alphas[:-1]
	betas = np.clip(betas, a_min=0, a_max=0.999)

	elif schedule == "sqrt_linear":
	betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
	elif schedule == "sqrt":
	betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5
	else:
	raise ValueError(f"schedule '{schedule}' unknown.")
	return betas.numpy()


	def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True):
	if ddim_discr_method == 'uniform':
	c = num_ddpm_timesteps // num_ddim_timesteps
	ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c)))
	elif ddim_discr_method == 'quad':
	ddim_timesteps = ((np.linspace(0, np.sqrt(num_ddpm_timesteps * .8), num_ddim_timesteps)) ** 2).astype(int)
	else:
	raise NotImplementedError(f'There is no ddim discretization method called "{ddim_discr_method}"')

	# assert ddim_timesteps.shape[0] == num_ddim_timesteps
	# add one to get the final alpha values right (the ones from first scale to data during sampling)
	steps_out = ddim_timesteps + 1
	if verbose:
	print(f'Selected timesteps for ddim sampler: {steps_out}')
	return steps_out


	def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True):
	# select alphas for computing the variance schedule
	alphas = alphacums[ddim_timesteps]
	alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist())

	# according the the formula provided in https://arxiv.org/abs/2010.02502
	sigmas = eta * np.sqrt((1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev))
	if verbose:
	print(f'Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}')
	print(f'For the chosen value of eta, which is {eta}, '
	f'this results in the following sigma_t schedule for ddim sampler {sigmas}')
	return sigmas, alphas, alphas_prev


	def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999):
	"""
	Create a beta schedule that discretizes the given alpha_t_bar function,
	which defines the cumulative product of (1-beta) over time from t = [0,1].
	:param num_diffusion_timesteps: the number of betas to produce.
	:param alpha_bar: a lambda that takes an argument t from 0 to 1 and
	produces the cumulative product of (1-beta) up to that
	part of the diffusion process.
	:param max_beta: the maximum beta to use; use values lower than 1 to
	prevent singularities.
	"""
	betas = []
	for i in range(num_diffusion_timesteps):
	t1 = i / num_diffusion_timesteps
	t2 = (i + 1) / num_diffusion_timesteps
	betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
	return np.array(betas)


	def extract_into_tensor(a, t, x_shape):
	b, *_ = t.shape
	out = a.gather(-1, t)
	return out.reshape(b, ((1,) (len(x_shape) - 1)))


	def checkpoint(func, inputs, params, flag):
	"""
	Evaluate a function without caching intermediate activations, allowing for
	reduced memory at the expense of extra compute in the backward pass.
	:param func: the function to evaluate.
	:param inputs: the argument sequence to pass to `func`.
	:param params: a sequence of parameters `func` depends on but does not
	explicitly take as arguments.
	:param flag: if False, disable gradient checkpointing.
	"""
	if flag:
	args = tuple(inputs) + tuple(params)
	return CheckpointFunction.apply(func, len(inputs), *args)
	else:
	return func(*inputs)


	class CheckpointFunction(torch.autograd.Function):
	@staticmethod
	def forward(ctx, run_function, length, *args):
	ctx.run_function = run_function
	ctx.input_tensors = list(args[:length])
	ctx.input_params = list(args[length:])

	with torch.no_grad():
	output_tensors = ctx.run_function(*ctx.input_tensors)
	return output_tensors

	@staticmethod
	def backward(ctx, *output_grads):
	ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
	with torch.enable_grad():
	# Fixes a bug where the first op in run_function modifies the
	# Tensor storage in place, which is not allowed for detach()'d
	# Tensors.
	shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
	output_tensors = ctx.run_function(*shallow_copies)
	input_grads = torch.autograd.grad(
	output_tensors,
	ctx.input_tensors + ctx.input_params,
	output_grads,
	allow_unused=True,
	)
	del ctx.input_tensors
	del ctx.input_params
	del output_tensors
	return (None, None) + input_grads


	def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False):
	"""
	Create sinusoidal timestep embeddings.
	:param timesteps: a 1-D Tensor of N indices, one per batch element.
	These may be fractional.
	:param dim: the dimension of the output.
	:param max_period: controls the minimum frequency of the embeddings.
	:return: an [N x dim] Tensor of positional embeddings.
	"""
	if not repeat_only:
	half = dim // 2
	freqs = torch.exp(
	-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half
	).to(device=timesteps.device)
	args = timesteps[:, None].float() * freqs[None]
	embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
	if dim % 2:
	embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
	else:
	embedding = repeat(timesteps, 'b -> b d', d=dim)
	return embedding


	def zero_module(module):
	"""
	Zero out the parameters of a module and return it.
	"""
	for p in module.parameters():
	p.detach().zero_()
	return module


	def scale_module(module, scale):
	"""
	Scale the parameters of a module and return it.
	"""
	for p in module.parameters():
	p.detach().mul_(scale)
	return module


	def mean_flat(tensor):
	"""
	Take the mean over all non-batch dimensions.
	"""
	return tensor.mean(dim=list(range(1, len(tensor.shape))))


	def normalization(channels):
	"""
	Make a standard normalization layer.
	:param channels: number of input channels.
	:return: an nn.Module for normalization.
	"""
	return GroupNorm32(32, channels)


	# PyTorch 1.7 has SiLU, but we support PyTorch 1.5.
	class SiLU(nn.Module):
	def forward(self, x):
	return x * torch.sigmoid(x)


	class GroupNorm32(nn.GroupNorm):
	def forward(self, x):
	return super().forward(x.float()).type(x.dtype)

	def conv_nd(dims, args, *kwargs):
	"""
	Create a 1D, 2D, or 3D convolution module.
	"""
	if dims == 1:
	return nn.Conv1d(args, *kwargs)
	elif dims == 2:
	return nn.Conv2d(args, *kwargs)
	elif dims == 3:
	return nn.Conv3d(args, *kwargs)
	raise ValueError(f"unsupported dimensions: {dims}")


	def linear(args, *kwargs):
	"""
	Create a linear module.
	"""
	return nn.Linear(args, *kwargs)


	def avg_pool_nd(dims, args, *kwargs):
	"""
	Create a 1D, 2D, or 3D average pooling module.
	"""
	if dims == 1:
	return nn.AvgPool1d(args, *kwargs)
	elif dims == 2:
	return nn.AvgPool2d(args, *kwargs)
	elif dims == 3:
	return nn.AvgPool3d(args, *kwargs)
	raise ValueError(f"unsupported dimensions: {dims}")


	class HybridConditioner(nn.Module):

	def __init__(self, c_concat_config, c_crossattn_config):
	super().__init__()
	self.concat_conditioner = instantiate_from_config(c_concat_config)
	self.crossattn_conditioner = instantiate_from_config(c_crossattn_config)

	def forward(self, c_concat, c_crossattn):
	c_concat = self.concat_conditioner(c_concat)
	c_crossattn = self.crossattn_conditioner(c_crossattn)
	return {'c_concat': [c_concat], 'c_crossattn': [c_crossattn]}


	def noise_like(shape, device, repeat=False):
	repeat_noise = lambda: torch.randn((1, shape[1:]), device=device).repeat(shape[0], ((1,) * (len(shape) - 1)))
	noise = lambda: torch.randn(shape, device=device)
	return repeat_noise() if repeat else noise()


	# zero123/zero123/ldm/modules/attention.py
	from inspect import isfunction
	import math
	import torch
	import torch.nn.functional as F
	from torch import nn, einsum
	from einops import rearrange, repeat


	def exists(val):
	return val is not None


	def uniq(arr):
	return{el: True for el in arr}.keys()


	def default(val, d):
	if exists(val):
	return val
	return d() if isfunction(d) else d


	def max_neg_value(t):
	return -torch.finfo(t.dtype).max


	def init_(tensor):
	dim = tensor.shape[-1]
	std = 1 / math.sqrt(dim)
	tensor.uniform_(-std, std)
	return tensor


	# feedforward
	class GEGLU(nn.Module):
	def __init__(self, dim_in, dim_out):
	super().__init__()
	self.proj = nn.Linear(dim_in, dim_out * 2)

	def forward(self, x):
	x, gate = self.proj(x).chunk(2, dim=-1)
	return x * F.gelu(gate)


	class FeedForward(nn.Module):
	def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.):
	super().__init__()
	inner_dim = int(dim * mult)
	dim_out = default(dim_out, dim)
	project_in = nn.Sequential(
	nn.Linear(dim, inner_dim),
	nn.GELU()
	) if not glu else GEGLU(dim, inner_dim)

	self.net = nn.Sequential(
	project_in,
	nn.Dropout(dropout),
	nn.Linear(inner_dim, dim_out)
	)

	def forward(self, x):
	return self.net(x)


	def zero_module(module):
	"""
	Zero out the parameters of a module and return it.
	"""
	for p in module.parameters():
	p.detach().zero_()
	return module


	def Normalize(in_channels):
	return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)


	class LinearAttention(nn.Module):
	def __init__(self, dim, heads=4, dim_head=32):
	super().__init__()
	self.heads = heads
	hidden_dim = dim_head * heads
	self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias = False)
	self.to_out = nn.Conv2d(hidden_dim, dim, 1)

	def forward(self, x):
	b, c, h, w = x.shape
	qkv = self.to_qkv(x)
	q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads = self.heads, qkv=3)
	k = k.softmax(dim=-1)
	context = torch.einsum('bhdn,bhen->bhde', k, v)
	out = torch.einsum('bhde,bhdn->bhen', context, q)
	out = rearrange(out, 'b heads c (h w) -> b (heads c) h w', heads=self.heads, h=h, w=w)
	return self.to_out(out)


	class SpatialSelfAttention(nn.Module):
	def __init__(self, in_channels):
	super().__init__()
	self.in_channels = in_channels

	self.norm = Normalize(in_channels)
	self.q = torch.nn.Conv2d(in_channels,
	in_channels,
	kernel_size=1,
	stride=1,
	padding=0)
	self.k = torch.nn.Conv2d(in_channels,
	in_channels,
	kernel_size=1,
	stride=1,
	padding=0)
	self.v = torch.nn.Conv2d(in_channels,
	in_channels,
	kernel_size=1,
	stride=1,
	padding=0)
	self.proj_out = torch.nn.Conv2d(in_channels,
	in_channels,
	kernel_size=1,
	stride=1,
	padding=0)

	def forward(self, x):
	h_ = x
	h_ = self.norm(h_)
	q = self.q(h_)
	k = self.k(h_)
	v = self.v(h_)

	# compute attention
	b,c,h,w = q.shape
	q = rearrange(q, 'b c h w -> b (h w) c')
	k = rearrange(k, 'b c h w -> b c (h w)')
	w_ = torch.einsum('bij,bjk->bik', q, k)

	w_ = w_ * (int(c)**(-0.5))
	w_ = torch.nn.functional.softmax(w_, dim=2)

	# attend to values
	v = rearrange(v, 'b c h w -> b c (h w)')
	w_ = rearrange(w_, 'b i j -> b j i')
	h_ = torch.einsum('bij,bjk->bik', v, w_)
	h_ = rearrange(h_, 'b c (h w) -> b c h w', h=h)
	h_ = self.proj_out(h_)

	return x+h_


	class CrossAttention(nn.Module):
	def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.):
	super().__init__()
	inner_dim = dim_head * heads
	context_dim = default(context_dim, query_dim)

	self.scale = dim_head ** -0.5
	self.heads = heads

	self.to_q = nn.Linear(query_dim, inner_dim, bias=False)
	self.to_k = nn.Linear(context_dim, inner_dim, bias=False)
	self.to_v = nn.Linear(context_dim, inner_dim, bias=False)

	self.to_out = nn.Sequential(
	nn.Linear(inner_dim, query_dim),
	nn.Dropout(dropout)
	)

	def forward(self, x, context=None, mask=None):
	h = self.heads

	q = self.to_q(x)
	context = default(context, x)
	k = self.to_k(context)
	v = self.to_v(context)

	q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))

	sim = einsum('b i d, b j d -> b i j', q, k) * self.scale

	if exists(mask):
	mask = rearrange(mask, 'b ... -> b (...)')
	max_neg_value = -torch.finfo(sim.dtype).max
	mask = repeat(mask, 'b j -> (b h) () j', h=h)
	sim.masked_fill_(~mask, max_neg_value)

	# attention, what we cannot get enough of
	attn = sim.softmax(dim=-1)

	out = einsum('b i j, b j d -> b i d', attn, v)
	out = rearrange(out, '(b h) n d -> b n (h d)', h=h)
	return self.to_out(out)


	class BasicTransformerBlock(nn.Module):
	def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True,
	disable_self_attn=False):
	super().__init__()
	self.disable_self_attn = disable_self_attn
	self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout,
	context_dim=context_dim if self.disable_self_attn else None) # is a self-attention if not self.disable_self_attn
	self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff)
	self.attn2 = CrossAttention(query_dim=dim, context_dim=context_dim,
	heads=n_heads, dim_head=d_head, dropout=dropout) # is self-attn if context is none
	self.norm1 = nn.LayerNorm(dim)
	self.norm2 = nn.LayerNorm(dim)
	self.norm3 = nn.LayerNorm(dim)
	self.checkpoint = checkpoint

	def forward(self, x, context=None):
	return checkpoint(self._forward, (x, context), self.parameters(), self.checkpoint)

	def _forward(self, x, context=None):
	x = self.attn1(self.norm1(x), context=context if self.disable_self_attn else None) + x
	x = self.attn2(self.norm2(x), context=context) + x
	x = self.ff(self.norm3(x)) + x
	return x


	class SpatialTransformer(nn.Module):
	"""
	Transformer block for image-like data.
	First, project the input (aka embedding)
	and reshape to b, t, d.
	Then apply standard transformer action.
	Finally, reshape to image
	"""
	def __init__(self, in_channels, n_heads, d_head,
	depth=1, dropout=0., context_dim=None,
	disable_self_attn=False):
	super().__init__()
	self.in_channels = in_channels
	inner_dim = n_heads * d_head
	self.norm = Normalize(in_channels)

	self.proj_in = nn.Conv2d(in_channels,
	inner_dim,
	kernel_size=1,
	stride=1,
	padding=0)

	self.transformer_blocks = nn.ModuleList(
	[BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim,
	disable_self_attn=disable_self_attn)
	for d in range(depth)]
	)

	self.proj_out = zero_module(nn.Conv2d(inner_dim,
	in_channels,
	kernel_size=1,
	stride=1,
	padding=0))

	def forward(self, x, context=None):
	# note: if no context is given, cross-attention defaults to self-attention
	b, c, h, w = x.shape
	x_in = x
	x = self.norm(x)
	x = self.proj_in(x)
	x = rearrange(x, 'b c h w -> b (h w) c').contiguous()
	for block in self.transformer_blocks:
	x = block(x, context=context)
	x = rearrange(x, 'b (h w) c -> b c h w', h=h, w=w).contiguous()
	x = self.proj_out(x)
	return x + x_in


	def exists(x):
	return x is not None