whyun13's picture
Upload folder using huggingface_hub
882f6e2 verified
"""
Copyright (c) Meta Platforms, Inc. and affiliates.
All rights reserved.
This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import logging
from typing import Optional, Dict
import numpy as np
import torch as th
import torch.nn as nn
import torch.nn.functional as F
# TODO: use shared utils here?
import visualize.ca_body.nn.layers as la
from visualize.ca_body.nn.blocks import tile2d, weights_initializer
logger = logging.getLogger(__name__)
class ShadowUNet(nn.Module):
def __init__(
self,
uv_size,
ao_mean,
shadow_size,
lrelu_slope=0.2,
beta=1.0,
n_dims=64,
interp_mode="bilinear",
biases=True,
trainable_mean=False,
):
super().__init__()
# this is the size of the output
self.uv_size = uv_size
self.shadow_size = shadow_size
ao_mean = F.interpolate(
th.as_tensor(ao_mean)[np.newaxis],
size=(self.shadow_size, self.shadow_size),
)[0]
if not trainable_mean:
# TODO:
self.register_buffer("ao_mean", ao_mean)
else:
self.register_parameter("ao_mean", th.nn.Parameter(ao_mean))
self.depth = 3
self.lrelu_slope = lrelu_slope
self.interp_mode = interp_mode
self.align_corners = None
if interp_mode == "bilinear":
self.align_corners = False
# the base number of dimensions for the shadow maps
n_dims = n_dims
# TODO: generate this?
self.n_enc_dims = [
(1, n_dims),
(n_dims, n_dims),
(n_dims, n_dims),
(n_dims, n_dims),
]
self.sizes = [shadow_size // (2**i) for i in range(len(self.n_enc_dims))]
logger.debug(f"sizes: {self.sizes}")
self.enc_layers = nn.ModuleList()
for i, size in enumerate(self.sizes):
n_in, n_out = self.n_enc_dims[i]
logger.debug(f"EncoderLayers({i}): {n_in}, {n_out}, {size}")
self.enc_layers.append(
nn.Sequential(
la.Conv2dWNUB(
n_in,
n_out,
kernel_size=3,
height=size,
width=size,
stride=1,
padding=1,
),
nn.LeakyReLU(self.lrelu_slope, inplace=True),
)
)
self.n_dec_dims = [
(n_dims, n_dims),
(n_dims * 2, n_dims),
(n_dims * 2, n_dims),
(n_dims * 2, n_dims),
]
self.dec_layers = nn.ModuleList()
for i in range(len(self.sizes)):
size = self.sizes[-i - 1]
n_in, n_out = self.n_dec_dims[i]
logger.debug(f"DecoderLayer({i}): {n_in}, {n_out}, {size}")
self.dec_layers.append(
nn.Sequential(
la.Conv2dWNUB(
n_in,
n_out,
kernel_size=3,
height=size,
width=size,
stride=1,
padding=1,
),
nn.LeakyReLU(self.lrelu_slope, inplace=True),
)
)
self.apply(weights_initializer(self.lrelu_slope))
if biases:
self.shadow_pred = la.Conv2dWNUB(
self.n_dec_dims[-1][-1],
1,
kernel_size=3,
height=self.sizes[0],
width=self.sizes[0],
stride=1,
padding=1,
)
else:
self.shadow_pred = la.Conv2dWN(
self.n_dec_dims[-1][-1],
1,
kernel_size=3,
stride=1,
padding=1,
)
self.shadow_pred.apply(weights_initializer(1.0))
self.beta = beta
def forward(self, ao_map):
# resizing the inputs if necessary
if ao_map.shape[-2:] != (self.shadow_size, self.shadow_size):
ao_map = F.interpolate(ao_map, size=(self.shadow_size, self.shadow_size))
x = ao_map - self.ao_mean
enc_acts = []
# unet enc
for i, layer in enumerate(self.enc_layers):
# TODO: try applying a 1D sparse op?
x = layer(x)
enc_acts.append(x)
# TODO: add this layer elsewhere?
if i < len(self.sizes) - 1:
x = F.interpolate(
x,
scale_factor=0.5,
mode="bilinear",
recompute_scale_factor=True,
align_corners=True,
)
# we do not need the last one?
for i, layer in enumerate(self.dec_layers):
if i > 0:
x_prev = enc_acts[-i - 1]
x = F.interpolate(x, size=x_prev.shape[2:4], mode="bilinear", align_corners=True)
x = th.cat([x, x_prev], dim=1)
x = layer(x)
shadow_map_lowres = th.sigmoid(self.shadow_pred(x) + self.beta)
shadow_map = F.interpolate(
shadow_map_lowres,
(self.uv_size, self.uv_size),
mode=self.interp_mode,
align_corners=self.align_corners,
)
return {
"shadow_map": shadow_map,
"ao_map": ao_map,
"shadow_map_lowres": shadow_map_lowres,
}
class FloorShadowDecoder(nn.Module):
def __init__(
self,
uv_size,
beta=1.0,
):
super().__init__()
# TODO: can we reduce # dims here?
self.down1 = nn.Sequential(la.Conv2dWNUB(1, 64, 256, 256, 4, 2, 1), nn.LeakyReLU(0.2))
self.down2 = nn.Sequential(la.Conv2dWNUB(64, 64, 128, 128, 4, 2, 1), nn.LeakyReLU(0.2))
self.down3 = nn.Sequential(la.Conv2dWNUB(64, 128, 64, 64, 4, 2, 1), nn.LeakyReLU(0.2))
self.down4 = nn.Sequential(la.Conv2dWNUB(128, 256, 32, 32, 4, 2, 1), nn.LeakyReLU(0.2))
self.down5 = nn.Sequential(la.Conv2dWNUB(256, 512, 16, 16, 4, 2, 1), nn.LeakyReLU(0.2))
self.up1 = nn.Sequential(
la.ConvTranspose2dWNUB(512, 256, 32, 32, 4, 2, 1), nn.LeakyReLU(0.2)
)
self.up2 = nn.Sequential(
la.ConvTranspose2dWNUB(256, 128, 64, 64, 4, 2, 1), nn.LeakyReLU(0.2)
)
self.up3 = nn.Sequential(
la.ConvTranspose2dWNUB(128, 64, 128, 128, 4, 2, 1), nn.LeakyReLU(0.2)
)
self.up4 = nn.Sequential(
la.ConvTranspose2dWNUB(64, 64, 256, 256, 4, 2, 1), nn.LeakyReLU(0.2)
)
self.up5 = nn.Sequential(la.ConvTranspose2dWNUB(64, 1, 512, 512, 4, 2, 1))
self.uv_size = uv_size
self.apply(lambda x: la.glorot(x, 0.2))
la.glorot(self.up5, 1.0)
self.beta = beta
def forward(self, aomap: th.Tensor):
aomap = F.interpolate(
aomap,
size=(self.uv_size, self.uv_size),
mode="bilinear",
align_corners=True,
)
x2 = self.down1(aomap - 0.5)
x3 = self.down2(x2)
x4 = self.down3(x3)
x5 = self.down4(x4)
x6 = self.down5(x5)
x = self.up1(x6) + x5
x = self.up2(x) + x4
x = self.up3(x) + x3
x = self.up4(x) + x2
logits = (th.tanh(self.up5(x) + aomap) + 1.0) / 2.0
return {"shadow_map": logits}
class ShadowUNet_PoseCond(nn.Module):
def __init__(
self,
uv_size,
ao_mean,
shadow_size,
# uv_coords, # for bottleneck
# uv_mapping, # for bottleneck
# uv_faces, # for bottleneck
lrelu_slope=0.2,
beta=1.0,
n_dims=64,
n_pose_dims=6, # root orientation only
n_pose_enc_dims=32,
interp_mode="bilinear",
):
super().__init__()
self.uv_size = uv_size
self.register_buffer("ao_mean", th.as_tensor(ao_mean))
# self.register_buffer("uv_coords", th.as_tensor(uv_coords))
# self.register_buffer("uv_mapping", th.as_tensor(uv_mapping))
# self.register_buffer("uv_faces", th.as_tensor(uv_faces))
self.depth = 3
self.lrelu_slope = lrelu_slope
self.interp_mode = interp_mode
self.uv_size = uv_size
# the base number of dimensions for the shadow maps
n_dims = n_dims
# TODO: generate this?
self.n_enc_dims = [
(1, n_dims),
(n_dims, n_dims),
(n_dims, n_dims),
(n_dims, n_dims),
]
self.shadow_size = shadow_size
self.sizes = [shadow_size // (2**i) for i in range(len(self.n_enc_dims))]
logger.info(f" shadow map size: {self.shadow_size}")
# logger.info(f"sizes: {self.sizes}")
#####
## FC for root pose encoding
self.num_pose_dims = n_pose_dims
self.num_pose_enc_dims = n_pose_enc_dims
self.pose_fc_block = nn.Sequential(
la.LinearWN(self.num_pose_dims, self.num_pose_enc_dims),
nn.LeakyReLU(lrelu_slope),
)
self.pose_conv_block = la.Conv2dWNUB(
in_channels=self.num_pose_dims,
out_channels=self.num_pose_enc_dims,
kernel_size=3,
height=self.sizes[-1],
width=self.sizes[-1],
padding=1,
)
self.enc_layers = nn.ModuleList()
for i, size in enumerate(self.sizes):
n_in, n_out = self.n_enc_dims[i]
# logger.info(f"EncoderLayers({i}): {n_in}, {n_out}, {size}")
self.enc_layers.append(
nn.Sequential(
la.Conv2dWNUB(
n_in,
n_out,
kernel_size=3,
height=size,
width=size,
stride=1,
padding=1,
),
nn.LeakyReLU(self.lrelu_slope, inplace=True),
)
)
self.n_dec_dims = [
(n_dims + self.num_pose_enc_dims, n_dims),
(n_dims * 2, n_dims),
(n_dims * 2, n_dims),
(n_dims * 2, n_dims),
]
self.dec_layers = nn.ModuleList()
for i in range(len(self.sizes)):
size = self.sizes[-i - 1]
n_in, n_out = self.n_dec_dims[i]
# logger.info(f"DecoderLayer({i}): {n_in}, {n_out}, {size}")
self.dec_layers.append(
nn.Sequential(
la.Conv2dWNUB(
n_in,
n_out,
kernel_size=3,
height=size,
width=size,
stride=1,
padding=1,
),
nn.LeakyReLU(self.lrelu_slope, inplace=True),
)
)
self.apply(weights_initializer(self.lrelu_slope))
self.shadow_pred = la.Conv2dWNUB(
self.n_dec_dims[-1][-1],
1,
kernel_size=3,
height=self.sizes[0],
width=self.sizes[0],
stride=1,
padding=1,
)
self.shadow_pred.apply(weights_initializer(1.0))
self.beta = beta
def forward(self, ao_map, pose_vec):
# import pdb; pdb.set_trace()
x = ao_map - self.ao_mean
x = F.interpolate(x, size=(self.shadow_size, self.shadow_size))
enc_acts = []
# unet enc
for i, layer in enumerate(self.enc_layers):
# for i in range(len(self.sizes)):
# TODO: try applying a 1D sparse op?
# x = self.enc_layers[i](x)
x = layer(x)
enc_acts.append(x)
# TODO: add this layer elsewhere?
if i < len(self.sizes) - 1:
x = F.interpolate(
x,
scale_factor=0.5,
mode="bilinear",
recompute_scale_factor=True,
align_corners=True,
)
pose_enc = self.pose_conv_block(tile2d(pose_vec, self.sizes[-1]))
# we do not need the last one?
x = th.cat([x, pose_enc], dim=1)
for i, layer in enumerate(self.dec_layers):
if i > 0:
x_prev = enc_acts[-i - 1]
x = F.interpolate(x, size=x_prev.shape[2:4], mode="bilinear", align_corners=True)
x = th.cat([x, x_prev], dim=1)
x = layer(x)
shadow_map_lowres = th.sigmoid(self.shadow_pred(x) + self.beta)
shadow_map = F.interpolate(
shadow_map_lowres, (self.uv_size, self.uv_size), mode=self.interp_mode
)
return {"shadow_map": shadow_map, "ao_map": ao_map}
class PoseToShadow(nn.Module):
def __init__(
self,
n_pose_dims,
uv_size,
beta=1.0,
) -> None:
super().__init__()
self.n_pose_dims = n_pose_dims
self.uv_size = uv_size
self.fc_block = nn.Sequential(
la.LinearWN(self.n_pose_dims, 256 * 4 * 4),
nn.LeakyReLU(0.2),
)
self.conv_block = nn.Sequential(
la.ConvTranspose2dWNUB(256, 256, 8, 8, 4, 2, 1),
nn.LeakyReLU(0.2),
la.ConvTranspose2dWNUB(256, 128, 16, 16, 4, 2, 1),
nn.LeakyReLU(0.2),
la.ConvTranspose2dWNUB(128, 128, 32, 32, 4, 2, 1),
nn.LeakyReLU(0.2),
la.ConvTranspose2dWNUB(128, 64, 64, 64, 4, 2, 1),
nn.LeakyReLU(0.2),
# la.ConvTranspose2dWNUB(64, 64, 128, 128, 4, 2, 1),
# nn.LeakyReLU(0.2),
# la.ConvTranspose2dWNUB(64, 1, 256, 256, 4, 2, 1),
la.ConvTranspose2dWNUB(64, 1, 128, 128, 4, 2, 1),
)
self.beta = beta
self.apply(lambda x: la.glorot(x, 0.2))
la.glorot(self.conv_block[-1], 1.0)
def forward(self, pose: th.Tensor):
assert pose.shape
x = self.fc_block(pose)
x = self.conv_block(x.reshape(-1, 256, 4, 4))
shadow_map_lowres = th.sigmoid(x + self.beta)
shadow_map = F.interpolate(
shadow_map_lowres, size=(self.uv_size, self.uv_size), mode="bilinear"
)
return {"shadow_map": shadow_map}
class DistMapShadowUNet(nn.Module):
def __init__(
self,
uv_size,
shadow_size,
n_dist_joints,
lrelu_slope=0.2,
beta=1.0,
n_dims=64,
interp_mode="bilinear",
biases=True,
):
super().__init__()
# this is the size of the output
self.uv_size = uv_size
self.shadow_size = shadow_size
self.depth = 3
self.lrelu_slope = lrelu_slope
self.interp_mode = interp_mode
self.align_corners = None
if interp_mode == "bilinear":
self.align_corners = False
# the base number of dimensions for the shadow maps
n_dims = n_dims
# TODO: generate this?
self.n_enc_dims = [
(n_dist_joints, n_dims),
(n_dims, n_dims),
(n_dims, n_dims),
(n_dims, n_dims),
]
self.sizes = [shadow_size // (2**i) for i in range(len(self.n_enc_dims))]
logger.debug(f"sizes: {self.sizes}")
self.enc_layers = nn.ModuleList()
for i, size in enumerate(self.sizes):
n_in, n_out = self.n_enc_dims[i]
logger.debug(f"EncoderLayers({i}): {n_in}, {n_out}, {size}")
self.enc_layers.append(
nn.Sequential(
la.Conv2dWNUB(
n_in,
n_out,
kernel_size=3,
height=size,
width=size,
stride=1,
padding=1,
),
nn.LeakyReLU(self.lrelu_slope, inplace=True),
)
)
self.n_dec_dims = [
(n_dims, n_dims),
(n_dims * 2, n_dims),
(n_dims * 2, n_dims),
(n_dims * 2, n_dims),
]
self.dec_layers = nn.ModuleList()
for i in range(len(self.sizes)):
size = self.sizes[-i - 1]
n_in, n_out = self.n_dec_dims[i]
logger.debug(f"DecoderLayer({i}): {n_in}, {n_out}, {size}")
self.dec_layers.append(
nn.Sequential(
la.Conv2dWNUB(
n_in,
n_out,
kernel_size=3,
height=size,
width=size,
stride=1,
padding=1,
),
nn.LeakyReLU(self.lrelu_slope, inplace=True),
)
)
self.apply(weights_initializer(self.lrelu_slope))
if biases:
self.shadow_pred = la.Conv2dWNUB(
self.n_dec_dims[-1][-1],
1,
kernel_size=3,
height=self.sizes[0],
width=self.sizes[0],
stride=1,
padding=1,
)
else:
self.shadow_pred = la.Conv2dWN(
self.n_dec_dims[-1][-1],
1,
kernel_size=3,
stride=1,
padding=1,
)
self.shadow_pred.apply(weights_initializer(1.0))
self.beta = beta
def forward(self, dist_map: th.Tensor) -> Dict[str, th.Tensor]:
# resizing the inputs if necessary
if dist_map.shape[-2:] != (self.shadow_size, self.shadow_size):
dist_map = F.interpolate(dist_map, size=(self.shadow_size, self.shadow_size))
x = dist_map
enc_acts = []
# unet enc
for i, layer in enumerate(self.enc_layers):
# TODO: try applying a 1D sparse op?
x = layer(x)
enc_acts.append(x)
# TODO: add this layer elsewhere?
if i < len(self.sizes) - 1:
x = F.interpolate(
x,
scale_factor=0.5,
mode="bilinear",
recompute_scale_factor=True,
align_corners=True,
)
# we do not need the last one?
for i, layer in enumerate(self.dec_layers):
if i > 0:
x_prev = enc_acts[-i - 1]
x = F.interpolate(x, size=x_prev.shape[2:4], mode="bilinear", align_corners=True)
x = th.cat([x, x_prev], dim=1)
x = layer(x)
shadow_map_lowres = th.sigmoid(self.shadow_pred(x) + self.beta)
shadow_map = F.interpolate(
shadow_map_lowres,
(self.uv_size, self.uv_size),
mode=self.interp_mode,
align_corners=self.align_corners,
)
return {
"shadow_map": shadow_map,
"shadow_map_lowres": shadow_map_lowres,
}