""" Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. This source code is licensed under the license found in the LICENSE file in the root directory of this source tree. """ import logging from typing import Optional, Dict import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F # TODO: use shared utils here? import visualize.ca_body.nn.layers as la from visualize.ca_body.nn.blocks import tile2d, weights_initializer logger = logging.getLogger(__name__) class ShadowUNet(nn.Module): def __init__( self, uv_size, ao_mean, shadow_size, lrelu_slope=0.2, beta=1.0, n_dims=64, interp_mode="bilinear", biases=True, trainable_mean=False, ): super().__init__() # this is the size of the output self.uv_size = uv_size self.shadow_size = shadow_size ao_mean = F.interpolate( th.as_tensor(ao_mean)[np.newaxis], size=(self.shadow_size, self.shadow_size), )[0] if not trainable_mean: # TODO: self.register_buffer("ao_mean", ao_mean) else: self.register_parameter("ao_mean", th.nn.Parameter(ao_mean)) self.depth = 3 self.lrelu_slope = lrelu_slope self.interp_mode = interp_mode self.align_corners = None if interp_mode == "bilinear": self.align_corners = False # the base number of dimensions for the shadow maps n_dims = n_dims # TODO: generate this? self.n_enc_dims = [ (1, n_dims), (n_dims, n_dims), (n_dims, n_dims), (n_dims, n_dims), ] self.sizes = [shadow_size // (2**i) for i in range(len(self.n_enc_dims))] logger.debug(f"sizes: {self.sizes}") self.enc_layers = nn.ModuleList() for i, size in enumerate(self.sizes): n_in, n_out = self.n_enc_dims[i] logger.debug(f"EncoderLayers({i}): {n_in}, {n_out}, {size}") self.enc_layers.append( nn.Sequential( la.Conv2dWNUB( n_in, n_out, kernel_size=3, height=size, width=size, stride=1, padding=1, ), nn.LeakyReLU(self.lrelu_slope, inplace=True), ) ) self.n_dec_dims = [ (n_dims, n_dims), (n_dims * 2, n_dims), (n_dims * 2, n_dims), (n_dims * 2, n_dims), ] self.dec_layers = nn.ModuleList() for i in range(len(self.sizes)): size = self.sizes[-i - 1] n_in, n_out = self.n_dec_dims[i] logger.debug(f"DecoderLayer({i}): {n_in}, {n_out}, {size}") self.dec_layers.append( nn.Sequential( la.Conv2dWNUB( n_in, n_out, kernel_size=3, height=size, width=size, stride=1, padding=1, ), nn.LeakyReLU(self.lrelu_slope, inplace=True), ) ) self.apply(weights_initializer(self.lrelu_slope)) if biases: self.shadow_pred = la.Conv2dWNUB( self.n_dec_dims[-1][-1], 1, kernel_size=3, height=self.sizes[0], width=self.sizes[0], stride=1, padding=1, ) else: self.shadow_pred = la.Conv2dWN( self.n_dec_dims[-1][-1], 1, kernel_size=3, stride=1, padding=1, ) self.shadow_pred.apply(weights_initializer(1.0)) self.beta = beta def forward(self, ao_map): # resizing the inputs if necessary if ao_map.shape[-2:] != (self.shadow_size, self.shadow_size): ao_map = F.interpolate(ao_map, size=(self.shadow_size, self.shadow_size)) x = ao_map - self.ao_mean enc_acts = [] # unet enc for i, layer in enumerate(self.enc_layers): # TODO: try applying a 1D sparse op? x = layer(x) enc_acts.append(x) # TODO: add this layer elsewhere? if i < len(self.sizes) - 1: x = F.interpolate( x, scale_factor=0.5, mode="bilinear", recompute_scale_factor=True, align_corners=True, ) # we do not need the last one? for i, layer in enumerate(self.dec_layers): if i > 0: x_prev = enc_acts[-i - 1] x = F.interpolate(x, size=x_prev.shape[2:4], mode="bilinear", align_corners=True) x = th.cat([x, x_prev], dim=1) x = layer(x) shadow_map_lowres = th.sigmoid(self.shadow_pred(x) + self.beta) shadow_map = F.interpolate( shadow_map_lowres, (self.uv_size, self.uv_size), mode=self.interp_mode, align_corners=self.align_corners, ) return { "shadow_map": shadow_map, "ao_map": ao_map, "shadow_map_lowres": shadow_map_lowres, } class FloorShadowDecoder(nn.Module): def __init__( self, uv_size, beta=1.0, ): super().__init__() # TODO: can we reduce # dims here? self.down1 = nn.Sequential(la.Conv2dWNUB(1, 64, 256, 256, 4, 2, 1), nn.LeakyReLU(0.2)) self.down2 = nn.Sequential(la.Conv2dWNUB(64, 64, 128, 128, 4, 2, 1), nn.LeakyReLU(0.2)) self.down3 = nn.Sequential(la.Conv2dWNUB(64, 128, 64, 64, 4, 2, 1), nn.LeakyReLU(0.2)) self.down4 = nn.Sequential(la.Conv2dWNUB(128, 256, 32, 32, 4, 2, 1), nn.LeakyReLU(0.2)) self.down5 = nn.Sequential(la.Conv2dWNUB(256, 512, 16, 16, 4, 2, 1), nn.LeakyReLU(0.2)) self.up1 = nn.Sequential( la.ConvTranspose2dWNUB(512, 256, 32, 32, 4, 2, 1), nn.LeakyReLU(0.2) ) self.up2 = nn.Sequential( la.ConvTranspose2dWNUB(256, 128, 64, 64, 4, 2, 1), nn.LeakyReLU(0.2) ) self.up3 = nn.Sequential( la.ConvTranspose2dWNUB(128, 64, 128, 128, 4, 2, 1), nn.LeakyReLU(0.2) ) self.up4 = nn.Sequential( la.ConvTranspose2dWNUB(64, 64, 256, 256, 4, 2, 1), nn.LeakyReLU(0.2) ) self.up5 = nn.Sequential(la.ConvTranspose2dWNUB(64, 1, 512, 512, 4, 2, 1)) self.uv_size = uv_size self.apply(lambda x: la.glorot(x, 0.2)) la.glorot(self.up5, 1.0) self.beta = beta def forward(self, aomap: th.Tensor): aomap = F.interpolate( aomap, size=(self.uv_size, self.uv_size), mode="bilinear", align_corners=True, ) x2 = self.down1(aomap - 0.5) x3 = self.down2(x2) x4 = self.down3(x3) x5 = self.down4(x4) x6 = self.down5(x5) x = self.up1(x6) + x5 x = self.up2(x) + x4 x = self.up3(x) + x3 x = self.up4(x) + x2 logits = (th.tanh(self.up5(x) + aomap) + 1.0) / 2.0 return {"shadow_map": logits} class ShadowUNet_PoseCond(nn.Module): def __init__( self, uv_size, ao_mean, shadow_size, # uv_coords, # for bottleneck # uv_mapping, # for bottleneck # uv_faces, # for bottleneck lrelu_slope=0.2, beta=1.0, n_dims=64, n_pose_dims=6, # root orientation only n_pose_enc_dims=32, interp_mode="bilinear", ): super().__init__() self.uv_size = uv_size self.register_buffer("ao_mean", th.as_tensor(ao_mean)) # self.register_buffer("uv_coords", th.as_tensor(uv_coords)) # self.register_buffer("uv_mapping", th.as_tensor(uv_mapping)) # self.register_buffer("uv_faces", th.as_tensor(uv_faces)) self.depth = 3 self.lrelu_slope = lrelu_slope self.interp_mode = interp_mode self.uv_size = uv_size # the base number of dimensions for the shadow maps n_dims = n_dims # TODO: generate this? self.n_enc_dims = [ (1, n_dims), (n_dims, n_dims), (n_dims, n_dims), (n_dims, n_dims), ] self.shadow_size = shadow_size self.sizes = [shadow_size // (2**i) for i in range(len(self.n_enc_dims))] logger.info(f" shadow map size: {self.shadow_size}") # logger.info(f"sizes: {self.sizes}") ##### ## FC for root pose encoding self.num_pose_dims = n_pose_dims self.num_pose_enc_dims = n_pose_enc_dims self.pose_fc_block = nn.Sequential( la.LinearWN(self.num_pose_dims, self.num_pose_enc_dims), nn.LeakyReLU(lrelu_slope), ) self.pose_conv_block = la.Conv2dWNUB( in_channels=self.num_pose_dims, out_channels=self.num_pose_enc_dims, kernel_size=3, height=self.sizes[-1], width=self.sizes[-1], padding=1, ) self.enc_layers = nn.ModuleList() for i, size in enumerate(self.sizes): n_in, n_out = self.n_enc_dims[i] # logger.info(f"EncoderLayers({i}): {n_in}, {n_out}, {size}") self.enc_layers.append( nn.Sequential( la.Conv2dWNUB( n_in, n_out, kernel_size=3, height=size, width=size, stride=1, padding=1, ), nn.LeakyReLU(self.lrelu_slope, inplace=True), ) ) self.n_dec_dims = [ (n_dims + self.num_pose_enc_dims, n_dims), (n_dims * 2, n_dims), (n_dims * 2, n_dims), (n_dims * 2, n_dims), ] self.dec_layers = nn.ModuleList() for i in range(len(self.sizes)): size = self.sizes[-i - 1] n_in, n_out = self.n_dec_dims[i] # logger.info(f"DecoderLayer({i}): {n_in}, {n_out}, {size}") self.dec_layers.append( nn.Sequential( la.Conv2dWNUB( n_in, n_out, kernel_size=3, height=size, width=size, stride=1, padding=1, ), nn.LeakyReLU(self.lrelu_slope, inplace=True), ) ) self.apply(weights_initializer(self.lrelu_slope)) self.shadow_pred = la.Conv2dWNUB( self.n_dec_dims[-1][-1], 1, kernel_size=3, height=self.sizes[0], width=self.sizes[0], stride=1, padding=1, ) self.shadow_pred.apply(weights_initializer(1.0)) self.beta = beta def forward(self, ao_map, pose_vec): # import pdb; pdb.set_trace() x = ao_map - self.ao_mean x = F.interpolate(x, size=(self.shadow_size, self.shadow_size)) enc_acts = [] # unet enc for i, layer in enumerate(self.enc_layers): # for i in range(len(self.sizes)): # TODO: try applying a 1D sparse op? # x = self.enc_layers[i](x) x = layer(x) enc_acts.append(x) # TODO: add this layer elsewhere? if i < len(self.sizes) - 1: x = F.interpolate( x, scale_factor=0.5, mode="bilinear", recompute_scale_factor=True, align_corners=True, ) pose_enc = self.pose_conv_block(tile2d(pose_vec, self.sizes[-1])) # we do not need the last one? x = th.cat([x, pose_enc], dim=1) for i, layer in enumerate(self.dec_layers): if i > 0: x_prev = enc_acts[-i - 1] x = F.interpolate(x, size=x_prev.shape[2:4], mode="bilinear", align_corners=True) x = th.cat([x, x_prev], dim=1) x = layer(x) shadow_map_lowres = th.sigmoid(self.shadow_pred(x) + self.beta) shadow_map = F.interpolate( shadow_map_lowres, (self.uv_size, self.uv_size), mode=self.interp_mode ) return {"shadow_map": shadow_map, "ao_map": ao_map} class PoseToShadow(nn.Module): def __init__( self, n_pose_dims, uv_size, beta=1.0, ) -> None: super().__init__() self.n_pose_dims = n_pose_dims self.uv_size = uv_size self.fc_block = nn.Sequential( la.LinearWN(self.n_pose_dims, 256 * 4 * 4), nn.LeakyReLU(0.2), ) self.conv_block = nn.Sequential( la.ConvTranspose2dWNUB(256, 256, 8, 8, 4, 2, 1), nn.LeakyReLU(0.2), la.ConvTranspose2dWNUB(256, 128, 16, 16, 4, 2, 1), nn.LeakyReLU(0.2), la.ConvTranspose2dWNUB(128, 128, 32, 32, 4, 2, 1), nn.LeakyReLU(0.2), la.ConvTranspose2dWNUB(128, 64, 64, 64, 4, 2, 1), nn.LeakyReLU(0.2), # la.ConvTranspose2dWNUB(64, 64, 128, 128, 4, 2, 1), # nn.LeakyReLU(0.2), # la.ConvTranspose2dWNUB(64, 1, 256, 256, 4, 2, 1), la.ConvTranspose2dWNUB(64, 1, 128, 128, 4, 2, 1), ) self.beta = beta self.apply(lambda x: la.glorot(x, 0.2)) la.glorot(self.conv_block[-1], 1.0) def forward(self, pose: th.Tensor): assert pose.shape x = self.fc_block(pose) x = self.conv_block(x.reshape(-1, 256, 4, 4)) shadow_map_lowres = th.sigmoid(x + self.beta) shadow_map = F.interpolate( shadow_map_lowres, size=(self.uv_size, self.uv_size), mode="bilinear" ) return {"shadow_map": shadow_map} class DistMapShadowUNet(nn.Module): def __init__( self, uv_size, shadow_size, n_dist_joints, lrelu_slope=0.2, beta=1.0, n_dims=64, interp_mode="bilinear", biases=True, ): super().__init__() # this is the size of the output self.uv_size = uv_size self.shadow_size = shadow_size self.depth = 3 self.lrelu_slope = lrelu_slope self.interp_mode = interp_mode self.align_corners = None if interp_mode == "bilinear": self.align_corners = False # the base number of dimensions for the shadow maps n_dims = n_dims # TODO: generate this? self.n_enc_dims = [ (n_dist_joints, n_dims), (n_dims, n_dims), (n_dims, n_dims), (n_dims, n_dims), ] self.sizes = [shadow_size // (2**i) for i in range(len(self.n_enc_dims))] logger.debug(f"sizes: {self.sizes}") self.enc_layers = nn.ModuleList() for i, size in enumerate(self.sizes): n_in, n_out = self.n_enc_dims[i] logger.debug(f"EncoderLayers({i}): {n_in}, {n_out}, {size}") self.enc_layers.append( nn.Sequential( la.Conv2dWNUB( n_in, n_out, kernel_size=3, height=size, width=size, stride=1, padding=1, ), nn.LeakyReLU(self.lrelu_slope, inplace=True), ) ) self.n_dec_dims = [ (n_dims, n_dims), (n_dims * 2, n_dims), (n_dims * 2, n_dims), (n_dims * 2, n_dims), ] self.dec_layers = nn.ModuleList() for i in range(len(self.sizes)): size = self.sizes[-i - 1] n_in, n_out = self.n_dec_dims[i] logger.debug(f"DecoderLayer({i}): {n_in}, {n_out}, {size}") self.dec_layers.append( nn.Sequential( la.Conv2dWNUB( n_in, n_out, kernel_size=3, height=size, width=size, stride=1, padding=1, ), nn.LeakyReLU(self.lrelu_slope, inplace=True), ) ) self.apply(weights_initializer(self.lrelu_slope)) if biases: self.shadow_pred = la.Conv2dWNUB( self.n_dec_dims[-1][-1], 1, kernel_size=3, height=self.sizes[0], width=self.sizes[0], stride=1, padding=1, ) else: self.shadow_pred = la.Conv2dWN( self.n_dec_dims[-1][-1], 1, kernel_size=3, stride=1, padding=1, ) self.shadow_pred.apply(weights_initializer(1.0)) self.beta = beta def forward(self, dist_map: th.Tensor) -> Dict[str, th.Tensor]: # resizing the inputs if necessary if dist_map.shape[-2:] != (self.shadow_size, self.shadow_size): dist_map = F.interpolate(dist_map, size=(self.shadow_size, self.shadow_size)) x = dist_map enc_acts = [] # unet enc for i, layer in enumerate(self.enc_layers): # TODO: try applying a 1D sparse op? x = layer(x) enc_acts.append(x) # TODO: add this layer elsewhere? if i < len(self.sizes) - 1: x = F.interpolate( x, scale_factor=0.5, mode="bilinear", recompute_scale_factor=True, align_corners=True, ) # we do not need the last one? for i, layer in enumerate(self.dec_layers): if i > 0: x_prev = enc_acts[-i - 1] x = F.interpolate(x, size=x_prev.shape[2:4], mode="bilinear", align_corners=True) x = th.cat([x, x_prev], dim=1) x = layer(x) shadow_map_lowres = th.sigmoid(self.shadow_pred(x) + self.beta) shadow_map = F.interpolate( shadow_map_lowres, (self.uv_size, self.uv_size), mode=self.interp_mode, align_corners=self.align_corners, ) return { "shadow_map": shadow_map, "shadow_map_lowres": shadow_map_lowres, }