Spaces:
Runtime error
Runtime error
""" | |
Copyright (c) Meta Platforms, Inc. and affiliates. | |
All rights reserved. | |
This source code is licensed under the license found in the | |
LICENSE file in the root directory of this source tree. | |
""" | |
import logging | |
from typing import Optional, Dict | |
import numpy as np | |
import torch as th | |
import torch.nn as nn | |
import torch.nn.functional as F | |
# TODO: use shared utils here? | |
import visualize.ca_body.nn.layers as la | |
from visualize.ca_body.nn.blocks import tile2d, weights_initializer | |
logger = logging.getLogger(__name__) | |
class ShadowUNet(nn.Module): | |
def __init__( | |
self, | |
uv_size, | |
ao_mean, | |
shadow_size, | |
lrelu_slope=0.2, | |
beta=1.0, | |
n_dims=64, | |
interp_mode="bilinear", | |
biases=True, | |
trainable_mean=False, | |
): | |
super().__init__() | |
# this is the size of the output | |
self.uv_size = uv_size | |
self.shadow_size = shadow_size | |
ao_mean = F.interpolate( | |
th.as_tensor(ao_mean)[np.newaxis], | |
size=(self.shadow_size, self.shadow_size), | |
)[0] | |
if not trainable_mean: | |
# TODO: | |
self.register_buffer("ao_mean", ao_mean) | |
else: | |
self.register_parameter("ao_mean", th.nn.Parameter(ao_mean)) | |
self.depth = 3 | |
self.lrelu_slope = lrelu_slope | |
self.interp_mode = interp_mode | |
self.align_corners = None | |
if interp_mode == "bilinear": | |
self.align_corners = False | |
# the base number of dimensions for the shadow maps | |
n_dims = n_dims | |
# TODO: generate this? | |
self.n_enc_dims = [ | |
(1, n_dims), | |
(n_dims, n_dims), | |
(n_dims, n_dims), | |
(n_dims, n_dims), | |
] | |
self.sizes = [shadow_size // (2**i) for i in range(len(self.n_enc_dims))] | |
logger.debug(f"sizes: {self.sizes}") | |
self.enc_layers = nn.ModuleList() | |
for i, size in enumerate(self.sizes): | |
n_in, n_out = self.n_enc_dims[i] | |
logger.debug(f"EncoderLayers({i}): {n_in}, {n_out}, {size}") | |
self.enc_layers.append( | |
nn.Sequential( | |
la.Conv2dWNUB( | |
n_in, | |
n_out, | |
kernel_size=3, | |
height=size, | |
width=size, | |
stride=1, | |
padding=1, | |
), | |
nn.LeakyReLU(self.lrelu_slope, inplace=True), | |
) | |
) | |
self.n_dec_dims = [ | |
(n_dims, n_dims), | |
(n_dims * 2, n_dims), | |
(n_dims * 2, n_dims), | |
(n_dims * 2, n_dims), | |
] | |
self.dec_layers = nn.ModuleList() | |
for i in range(len(self.sizes)): | |
size = self.sizes[-i - 1] | |
n_in, n_out = self.n_dec_dims[i] | |
logger.debug(f"DecoderLayer({i}): {n_in}, {n_out}, {size}") | |
self.dec_layers.append( | |
nn.Sequential( | |
la.Conv2dWNUB( | |
n_in, | |
n_out, | |
kernel_size=3, | |
height=size, | |
width=size, | |
stride=1, | |
padding=1, | |
), | |
nn.LeakyReLU(self.lrelu_slope, inplace=True), | |
) | |
) | |
self.apply(weights_initializer(self.lrelu_slope)) | |
if biases: | |
self.shadow_pred = la.Conv2dWNUB( | |
self.n_dec_dims[-1][-1], | |
1, | |
kernel_size=3, | |
height=self.sizes[0], | |
width=self.sizes[0], | |
stride=1, | |
padding=1, | |
) | |
else: | |
self.shadow_pred = la.Conv2dWN( | |
self.n_dec_dims[-1][-1], | |
1, | |
kernel_size=3, | |
stride=1, | |
padding=1, | |
) | |
self.shadow_pred.apply(weights_initializer(1.0)) | |
self.beta = beta | |
def forward(self, ao_map): | |
# resizing the inputs if necessary | |
if ao_map.shape[-2:] != (self.shadow_size, self.shadow_size): | |
ao_map = F.interpolate(ao_map, size=(self.shadow_size, self.shadow_size)) | |
x = ao_map - self.ao_mean | |
enc_acts = [] | |
# unet enc | |
for i, layer in enumerate(self.enc_layers): | |
# TODO: try applying a 1D sparse op? | |
x = layer(x) | |
enc_acts.append(x) | |
# TODO: add this layer elsewhere? | |
if i < len(self.sizes) - 1: | |
x = F.interpolate( | |
x, | |
scale_factor=0.5, | |
mode="bilinear", | |
recompute_scale_factor=True, | |
align_corners=True, | |
) | |
# we do not need the last one? | |
for i, layer in enumerate(self.dec_layers): | |
if i > 0: | |
x_prev = enc_acts[-i - 1] | |
x = F.interpolate(x, size=x_prev.shape[2:4], mode="bilinear", align_corners=True) | |
x = th.cat([x, x_prev], dim=1) | |
x = layer(x) | |
shadow_map_lowres = th.sigmoid(self.shadow_pred(x) + self.beta) | |
shadow_map = F.interpolate( | |
shadow_map_lowres, | |
(self.uv_size, self.uv_size), | |
mode=self.interp_mode, | |
align_corners=self.align_corners, | |
) | |
return { | |
"shadow_map": shadow_map, | |
"ao_map": ao_map, | |
"shadow_map_lowres": shadow_map_lowres, | |
} | |
class FloorShadowDecoder(nn.Module): | |
def __init__( | |
self, | |
uv_size, | |
beta=1.0, | |
): | |
super().__init__() | |
# TODO: can we reduce # dims here? | |
self.down1 = nn.Sequential(la.Conv2dWNUB(1, 64, 256, 256, 4, 2, 1), nn.LeakyReLU(0.2)) | |
self.down2 = nn.Sequential(la.Conv2dWNUB(64, 64, 128, 128, 4, 2, 1), nn.LeakyReLU(0.2)) | |
self.down3 = nn.Sequential(la.Conv2dWNUB(64, 128, 64, 64, 4, 2, 1), nn.LeakyReLU(0.2)) | |
self.down4 = nn.Sequential(la.Conv2dWNUB(128, 256, 32, 32, 4, 2, 1), nn.LeakyReLU(0.2)) | |
self.down5 = nn.Sequential(la.Conv2dWNUB(256, 512, 16, 16, 4, 2, 1), nn.LeakyReLU(0.2)) | |
self.up1 = nn.Sequential( | |
la.ConvTranspose2dWNUB(512, 256, 32, 32, 4, 2, 1), nn.LeakyReLU(0.2) | |
) | |
self.up2 = nn.Sequential( | |
la.ConvTranspose2dWNUB(256, 128, 64, 64, 4, 2, 1), nn.LeakyReLU(0.2) | |
) | |
self.up3 = nn.Sequential( | |
la.ConvTranspose2dWNUB(128, 64, 128, 128, 4, 2, 1), nn.LeakyReLU(0.2) | |
) | |
self.up4 = nn.Sequential( | |
la.ConvTranspose2dWNUB(64, 64, 256, 256, 4, 2, 1), nn.LeakyReLU(0.2) | |
) | |
self.up5 = nn.Sequential(la.ConvTranspose2dWNUB(64, 1, 512, 512, 4, 2, 1)) | |
self.uv_size = uv_size | |
self.apply(lambda x: la.glorot(x, 0.2)) | |
la.glorot(self.up5, 1.0) | |
self.beta = beta | |
def forward(self, aomap: th.Tensor): | |
aomap = F.interpolate( | |
aomap, | |
size=(self.uv_size, self.uv_size), | |
mode="bilinear", | |
align_corners=True, | |
) | |
x2 = self.down1(aomap - 0.5) | |
x3 = self.down2(x2) | |
x4 = self.down3(x3) | |
x5 = self.down4(x4) | |
x6 = self.down5(x5) | |
x = self.up1(x6) + x5 | |
x = self.up2(x) + x4 | |
x = self.up3(x) + x3 | |
x = self.up4(x) + x2 | |
logits = (th.tanh(self.up5(x) + aomap) + 1.0) / 2.0 | |
return {"shadow_map": logits} | |
class ShadowUNet_PoseCond(nn.Module): | |
def __init__( | |
self, | |
uv_size, | |
ao_mean, | |
shadow_size, | |
# uv_coords, # for bottleneck | |
# uv_mapping, # for bottleneck | |
# uv_faces, # for bottleneck | |
lrelu_slope=0.2, | |
beta=1.0, | |
n_dims=64, | |
n_pose_dims=6, # root orientation only | |
n_pose_enc_dims=32, | |
interp_mode="bilinear", | |
): | |
super().__init__() | |
self.uv_size = uv_size | |
self.register_buffer("ao_mean", th.as_tensor(ao_mean)) | |
# self.register_buffer("uv_coords", th.as_tensor(uv_coords)) | |
# self.register_buffer("uv_mapping", th.as_tensor(uv_mapping)) | |
# self.register_buffer("uv_faces", th.as_tensor(uv_faces)) | |
self.depth = 3 | |
self.lrelu_slope = lrelu_slope | |
self.interp_mode = interp_mode | |
self.uv_size = uv_size | |
# the base number of dimensions for the shadow maps | |
n_dims = n_dims | |
# TODO: generate this? | |
self.n_enc_dims = [ | |
(1, n_dims), | |
(n_dims, n_dims), | |
(n_dims, n_dims), | |
(n_dims, n_dims), | |
] | |
self.shadow_size = shadow_size | |
self.sizes = [shadow_size // (2**i) for i in range(len(self.n_enc_dims))] | |
logger.info(f" shadow map size: {self.shadow_size}") | |
# logger.info(f"sizes: {self.sizes}") | |
##### | |
## FC for root pose encoding | |
self.num_pose_dims = n_pose_dims | |
self.num_pose_enc_dims = n_pose_enc_dims | |
self.pose_fc_block = nn.Sequential( | |
la.LinearWN(self.num_pose_dims, self.num_pose_enc_dims), | |
nn.LeakyReLU(lrelu_slope), | |
) | |
self.pose_conv_block = la.Conv2dWNUB( | |
in_channels=self.num_pose_dims, | |
out_channels=self.num_pose_enc_dims, | |
kernel_size=3, | |
height=self.sizes[-1], | |
width=self.sizes[-1], | |
padding=1, | |
) | |
self.enc_layers = nn.ModuleList() | |
for i, size in enumerate(self.sizes): | |
n_in, n_out = self.n_enc_dims[i] | |
# logger.info(f"EncoderLayers({i}): {n_in}, {n_out}, {size}") | |
self.enc_layers.append( | |
nn.Sequential( | |
la.Conv2dWNUB( | |
n_in, | |
n_out, | |
kernel_size=3, | |
height=size, | |
width=size, | |
stride=1, | |
padding=1, | |
), | |
nn.LeakyReLU(self.lrelu_slope, inplace=True), | |
) | |
) | |
self.n_dec_dims = [ | |
(n_dims + self.num_pose_enc_dims, n_dims), | |
(n_dims * 2, n_dims), | |
(n_dims * 2, n_dims), | |
(n_dims * 2, n_dims), | |
] | |
self.dec_layers = nn.ModuleList() | |
for i in range(len(self.sizes)): | |
size = self.sizes[-i - 1] | |
n_in, n_out = self.n_dec_dims[i] | |
# logger.info(f"DecoderLayer({i}): {n_in}, {n_out}, {size}") | |
self.dec_layers.append( | |
nn.Sequential( | |
la.Conv2dWNUB( | |
n_in, | |
n_out, | |
kernel_size=3, | |
height=size, | |
width=size, | |
stride=1, | |
padding=1, | |
), | |
nn.LeakyReLU(self.lrelu_slope, inplace=True), | |
) | |
) | |
self.apply(weights_initializer(self.lrelu_slope)) | |
self.shadow_pred = la.Conv2dWNUB( | |
self.n_dec_dims[-1][-1], | |
1, | |
kernel_size=3, | |
height=self.sizes[0], | |
width=self.sizes[0], | |
stride=1, | |
padding=1, | |
) | |
self.shadow_pred.apply(weights_initializer(1.0)) | |
self.beta = beta | |
def forward(self, ao_map, pose_vec): | |
# import pdb; pdb.set_trace() | |
x = ao_map - self.ao_mean | |
x = F.interpolate(x, size=(self.shadow_size, self.shadow_size)) | |
enc_acts = [] | |
# unet enc | |
for i, layer in enumerate(self.enc_layers): | |
# for i in range(len(self.sizes)): | |
# TODO: try applying a 1D sparse op? | |
# x = self.enc_layers[i](x) | |
x = layer(x) | |
enc_acts.append(x) | |
# TODO: add this layer elsewhere? | |
if i < len(self.sizes) - 1: | |
x = F.interpolate( | |
x, | |
scale_factor=0.5, | |
mode="bilinear", | |
recompute_scale_factor=True, | |
align_corners=True, | |
) | |
pose_enc = self.pose_conv_block(tile2d(pose_vec, self.sizes[-1])) | |
# we do not need the last one? | |
x = th.cat([x, pose_enc], dim=1) | |
for i, layer in enumerate(self.dec_layers): | |
if i > 0: | |
x_prev = enc_acts[-i - 1] | |
x = F.interpolate(x, size=x_prev.shape[2:4], mode="bilinear", align_corners=True) | |
x = th.cat([x, x_prev], dim=1) | |
x = layer(x) | |
shadow_map_lowres = th.sigmoid(self.shadow_pred(x) + self.beta) | |
shadow_map = F.interpolate( | |
shadow_map_lowres, (self.uv_size, self.uv_size), mode=self.interp_mode | |
) | |
return {"shadow_map": shadow_map, "ao_map": ao_map} | |
class PoseToShadow(nn.Module): | |
def __init__( | |
self, | |
n_pose_dims, | |
uv_size, | |
beta=1.0, | |
) -> None: | |
super().__init__() | |
self.n_pose_dims = n_pose_dims | |
self.uv_size = uv_size | |
self.fc_block = nn.Sequential( | |
la.LinearWN(self.n_pose_dims, 256 * 4 * 4), | |
nn.LeakyReLU(0.2), | |
) | |
self.conv_block = nn.Sequential( | |
la.ConvTranspose2dWNUB(256, 256, 8, 8, 4, 2, 1), | |
nn.LeakyReLU(0.2), | |
la.ConvTranspose2dWNUB(256, 128, 16, 16, 4, 2, 1), | |
nn.LeakyReLU(0.2), | |
la.ConvTranspose2dWNUB(128, 128, 32, 32, 4, 2, 1), | |
nn.LeakyReLU(0.2), | |
la.ConvTranspose2dWNUB(128, 64, 64, 64, 4, 2, 1), | |
nn.LeakyReLU(0.2), | |
# la.ConvTranspose2dWNUB(64, 64, 128, 128, 4, 2, 1), | |
# nn.LeakyReLU(0.2), | |
# la.ConvTranspose2dWNUB(64, 1, 256, 256, 4, 2, 1), | |
la.ConvTranspose2dWNUB(64, 1, 128, 128, 4, 2, 1), | |
) | |
self.beta = beta | |
self.apply(lambda x: la.glorot(x, 0.2)) | |
la.glorot(self.conv_block[-1], 1.0) | |
def forward(self, pose: th.Tensor): | |
assert pose.shape | |
x = self.fc_block(pose) | |
x = self.conv_block(x.reshape(-1, 256, 4, 4)) | |
shadow_map_lowres = th.sigmoid(x + self.beta) | |
shadow_map = F.interpolate( | |
shadow_map_lowres, size=(self.uv_size, self.uv_size), mode="bilinear" | |
) | |
return {"shadow_map": shadow_map} | |
class DistMapShadowUNet(nn.Module): | |
def __init__( | |
self, | |
uv_size, | |
shadow_size, | |
n_dist_joints, | |
lrelu_slope=0.2, | |
beta=1.0, | |
n_dims=64, | |
interp_mode="bilinear", | |
biases=True, | |
): | |
super().__init__() | |
# this is the size of the output | |
self.uv_size = uv_size | |
self.shadow_size = shadow_size | |
self.depth = 3 | |
self.lrelu_slope = lrelu_slope | |
self.interp_mode = interp_mode | |
self.align_corners = None | |
if interp_mode == "bilinear": | |
self.align_corners = False | |
# the base number of dimensions for the shadow maps | |
n_dims = n_dims | |
# TODO: generate this? | |
self.n_enc_dims = [ | |
(n_dist_joints, n_dims), | |
(n_dims, n_dims), | |
(n_dims, n_dims), | |
(n_dims, n_dims), | |
] | |
self.sizes = [shadow_size // (2**i) for i in range(len(self.n_enc_dims))] | |
logger.debug(f"sizes: {self.sizes}") | |
self.enc_layers = nn.ModuleList() | |
for i, size in enumerate(self.sizes): | |
n_in, n_out = self.n_enc_dims[i] | |
logger.debug(f"EncoderLayers({i}): {n_in}, {n_out}, {size}") | |
self.enc_layers.append( | |
nn.Sequential( | |
la.Conv2dWNUB( | |
n_in, | |
n_out, | |
kernel_size=3, | |
height=size, | |
width=size, | |
stride=1, | |
padding=1, | |
), | |
nn.LeakyReLU(self.lrelu_slope, inplace=True), | |
) | |
) | |
self.n_dec_dims = [ | |
(n_dims, n_dims), | |
(n_dims * 2, n_dims), | |
(n_dims * 2, n_dims), | |
(n_dims * 2, n_dims), | |
] | |
self.dec_layers = nn.ModuleList() | |
for i in range(len(self.sizes)): | |
size = self.sizes[-i - 1] | |
n_in, n_out = self.n_dec_dims[i] | |
logger.debug(f"DecoderLayer({i}): {n_in}, {n_out}, {size}") | |
self.dec_layers.append( | |
nn.Sequential( | |
la.Conv2dWNUB( | |
n_in, | |
n_out, | |
kernel_size=3, | |
height=size, | |
width=size, | |
stride=1, | |
padding=1, | |
), | |
nn.LeakyReLU(self.lrelu_slope, inplace=True), | |
) | |
) | |
self.apply(weights_initializer(self.lrelu_slope)) | |
if biases: | |
self.shadow_pred = la.Conv2dWNUB( | |
self.n_dec_dims[-1][-1], | |
1, | |
kernel_size=3, | |
height=self.sizes[0], | |
width=self.sizes[0], | |
stride=1, | |
padding=1, | |
) | |
else: | |
self.shadow_pred = la.Conv2dWN( | |
self.n_dec_dims[-1][-1], | |
1, | |
kernel_size=3, | |
stride=1, | |
padding=1, | |
) | |
self.shadow_pred.apply(weights_initializer(1.0)) | |
self.beta = beta | |
def forward(self, dist_map: th.Tensor) -> Dict[str, th.Tensor]: | |
# resizing the inputs if necessary | |
if dist_map.shape[-2:] != (self.shadow_size, self.shadow_size): | |
dist_map = F.interpolate(dist_map, size=(self.shadow_size, self.shadow_size)) | |
x = dist_map | |
enc_acts = [] | |
# unet enc | |
for i, layer in enumerate(self.enc_layers): | |
# TODO: try applying a 1D sparse op? | |
x = layer(x) | |
enc_acts.append(x) | |
# TODO: add this layer elsewhere? | |
if i < len(self.sizes) - 1: | |
x = F.interpolate( | |
x, | |
scale_factor=0.5, | |
mode="bilinear", | |
recompute_scale_factor=True, | |
align_corners=True, | |
) | |
# we do not need the last one? | |
for i, layer in enumerate(self.dec_layers): | |
if i > 0: | |
x_prev = enc_acts[-i - 1] | |
x = F.interpolate(x, size=x_prev.shape[2:4], mode="bilinear", align_corners=True) | |
x = th.cat([x, x_prev], dim=1) | |
x = layer(x) | |
shadow_map_lowres = th.sigmoid(self.shadow_pred(x) + self.beta) | |
shadow_map = F.interpolate( | |
shadow_map_lowres, | |
(self.uv_size, self.uv_size), | |
mode=self.interp_mode, | |
align_corners=self.align_corners, | |
) | |
return { | |
"shadow_map": shadow_map, | |
"shadow_map_lowres": shadow_map_lowres, | |
} | |