Spaces:
Runtime error
Runtime error
File size: 9,500 Bytes
f670afc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, check out LICENSE.md
"""Utils for the pix2pixHD model."""
import numpy as np
import torch
from imaginaire.utils.data import get_paired_input_label_channel_number
from imaginaire.utils.distributed import dist_all_gather_tensor, is_master
from imaginaire.utils.distributed import master_only_print as print
from imaginaire.utils.trainer import (get_optimizer, get_optimizer_for_params,
wrap_model_and_optimizer)
from sklearn.cluster import KMeans
def cluster_features(cfg, train_data_loader, net_E,
preprocess=None, small_ratio=0.0625, is_cityscapes=True):
r"""Use clustering to compute the features.
Args:
cfg (obj): Global configuration file.
train_data_loader (obj): Dataloader for iterate through the training
set.
net_E (nn.Module): Pytorch network.
preprocess (function): Pre-processing function.
small_ratio (float): We only consider instance that at least occupy
$(small_ratio) amount of image space.
is_cityscapes (bool): Is this is the cityscape dataset? In the
Cityscapes dataset, the instance labels for car start with 26001,
26002, ...
Returns:
( num_labels x num_cluster_centers x feature_dims): cluster centers.
"""
# Encode features.
label_nc = get_paired_input_label_channel_number(cfg.data)
feat_nc = cfg.gen.enc.num_feat_channels
n_clusters = getattr(cfg.gen.enc, 'num_clusters', 10)
# Compute features.
features = {}
for label in range(label_nc):
features[label] = np.zeros((0, feat_nc + 1))
for data in train_data_loader:
if preprocess is not None:
data = preprocess(data)
feat = encode_features(net_E, feat_nc, label_nc,
data['images'], data['instance_maps'],
is_cityscapes)
# We only collect the feature vectors for the master GPU.
if is_master():
for label in range(label_nc):
features[label] = np.append(
features[label], feat[label], axis=0)
# Clustering.
# We only perform clustering for the master GPU.
if is_master():
for label in range(label_nc):
feat = features[label]
# We only consider segments that are greater than a pre-set
# threshold.
feat = feat[feat[:, -1] > small_ratio, :-1]
if feat.shape[0]:
n_clusters = min(feat.shape[0], n_clusters)
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(feat)
n, d = kmeans.cluster_centers_.shape
this_cluster = getattr(net_E, 'cluster_%d' % label)
this_cluster[0:n, :] = torch.Tensor(
kmeans.cluster_centers_).float()
def encode_features(net_E, feat_nc, label_nc, image, inst,
is_cityscapes=True):
r"""Compute feature embeddings for an image image.
TODO(Ting-Chun): To make this funciton dataset independent.
Args:
net_E (nn.Module): The encoder network.
feat_nc (int): Feature dimensions
label_nc (int): Number of segmentation labels.
image (tensor): Input image tensor.
inst (tensor): Input instance map.
is_cityscapes (bool): Is this is the cityscape dataset? In the
Cityscapes dataset, the instance labels for car start with 26001,
26002, ...
Returns:
(list of list of numpy vectors): We will have $(label_nc)
list. For each list, it will record a list of feature vectors of
dimension $(feat_nc+1) where the first $(feat_nc) dimensions is
the representative feature of an instance and the last dimension
is the proportion.
"""
# h, w = inst.size()[2:]
feat_map = net_E(image, inst)
feature_map_gather = dist_all_gather_tensor(feat_map)
inst_gathered = dist_all_gather_tensor(inst)
# Initialize the cluster centers.
# For each feature vector,
# 0:feat_nc will be the feature vector.
# The feat_nc dimension record the percentage of the instance.
feature = {}
for i in range(label_nc):
feature[i] = np.zeros((0, feat_nc + 1))
if is_master():
all_feat_map = torch.cat(feature_map_gather, 0)
all_inst_map = torch.cat(inst_gathered, 0)
# Scan through the batches.
for n in range(all_feat_map.size()[0]):
feat_map = all_feat_map[n:(n + 1), :, :, :]
inst = all_inst_map[n:(n + 1), :, :, :]
fh, fw = feat_map.size()[2:]
inst_np = inst.cpu().numpy().astype(int)
for i in np.unique(inst_np):
if is_cityscapes:
label = i if i < 1000 else i // 1000
else:
label = i
idx = (inst == int(i)).nonzero()
num = idx.size()[0]
# We will just pick the middle pixel as its representative
# feature.
idx = idx[num // 2, :]
val = np.zeros((1, feat_nc + 1))
for k in range(feat_nc):
# We expect idx[0]=0 and idx[1]=0 as the number of sample
# per processing is 1 (idx[0]=0) and the channel number of
# the instance map is 1.
val[0, k] = feat_map[
idx[0], idx[1] + k, idx[2], idx[3]].item()
val[0, feat_nc] = float(num) / (fh * fw)
feature[label] = np.append(feature[label], val, axis=0)
return feature
else:
return feature
def get_edges(t):
r""" Compute edge maps for a given input instance map.
Args:
t (4D tensor): Input instance map.
Returns:
(4D tensor): Output edge map.
"""
edge = torch.cuda.ByteTensor(t.size()).zero_()
edge[:, :, :, 1:] = edge[:, :, :, 1:] | (
t[:, :, :, 1:] != t[:, :, :, :-1]).byte()
edge[:, :, :, :-1] = edge[:, :, :, :-1] | (
t[:, :, :, 1:] != t[:, :, :, :-1]).byte()
edge[:, :, 1:, :] = edge[:, :, 1:, :] | (
t[:, :, 1:, :] != t[:, :, :-1, :]).byte()
edge[:, :, :-1, :] = edge[:, :, :-1, :] | (
t[:, :, 1:, :] != t[:, :, :-1, :]).byte()
return edge.float()
def get_train_params(net, param_names_start_with=[], param_names_include=[]):
r"""Get train parameters.
Args:
net (obj): Network object.
param_names_start_with (list of strings): Params whose names
start with any of the strings will be trained.
param_names_include (list of strings): Params whose names include
any of the strings will be trained.
"""
params_to_train = []
params_dict = net.state_dict()
list_of_param_names_to_train = set()
# Iterate through all params in the network and check if we need to
# train it.
for key, value in params_dict.items():
do_train = False
# If the param name starts with the target string (excluding
# the 'module' part etc), we will train this param.
key_s = key.replace('module.', '').replace('averaged_model.', '')
for param_name in param_names_start_with:
if key_s.startswith(param_name):
do_train = True
list_of_param_names_to_train.add(param_name)
# Otherwise, if the param name includes the target string,
# we will also train it.
if not do_train:
for param_name in param_names_include:
if param_name in key_s:
do_train = True
full_param_name = \
key_s[:(key_s.find(param_name) + len(param_name))]
list_of_param_names_to_train.add(full_param_name)
# If we decide to train the param, add it to the list to train.
if do_train:
module = net
key_list = key.split('.')
for k in key_list:
module = getattr(module, k)
params_to_train += [module]
print('Training layers: ', sorted(list_of_param_names_to_train))
return params_to_train
def get_optimizer_with_params(cfg, net_G, net_D, param_names_start_with=[],
param_names_include=[]):
r"""Return the optimizer object.
Args:
cfg (obj): Global config.
net_G (obj): Generator network.
net_D (obj): Discriminator network.
param_names_start_with (list of strings): Params whose names
start with any of the strings will be trained.
param_names_include (list of strings): Params whose names include
any of the strings will be trained.
"""
# If any of the param name lists is not empty, will only train
# these params. Otherwise will train the entire network (all params).
if param_names_start_with or param_names_include:
params = get_train_params(net_G, param_names_start_with,
param_names_include)
else:
params = net_G.parameters()
opt_G = get_optimizer_for_params(cfg.gen_opt, params)
opt_D = get_optimizer(cfg.dis_opt, net_D)
return wrap_model_and_optimizer(cfg, net_G, net_D, opt_G, opt_D)
|