Spaces:

venite
/

sat3density

Runtime error

App Files Files Community

sat3density / imaginaire /model_utils /pix2pixHD.py

venite

initial

f670afc over 1 year ago

raw

history blame contribute delete

9.5 kB

	# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	#
	# This work is made available under the Nvidia Source Code License-NC.
	# To view a copy of this license, check out LICENSE.md
	"""Utils for the pix2pixHD model."""
	import numpy as np
	import torch

	from imaginaire.utils.data import get_paired_input_label_channel_number
	from imaginaire.utils.distributed import dist_all_gather_tensor, is_master
	from imaginaire.utils.distributed import master_only_print as print
	from imaginaire.utils.trainer import (get_optimizer, get_optimizer_for_params,
	wrap_model_and_optimizer)
	from sklearn.cluster import KMeans


	def cluster_features(cfg, train_data_loader, net_E,
	preprocess=None, small_ratio=0.0625, is_cityscapes=True):
	r"""Use clustering to compute the features.

	Args:
	cfg (obj): Global configuration file.
	train_data_loader (obj): Dataloader for iterate through the training
	set.
	net_E (nn.Module): Pytorch network.
	preprocess (function): Pre-processing function.
	small_ratio (float): We only consider instance that at least occupy
	$(small_ratio) amount of image space.
	is_cityscapes (bool): Is this is the cityscape dataset? In the
	Cityscapes dataset, the instance labels for car start with 26001,
	26002, ...

	Returns:
	( num_labels x num_cluster_centers x feature_dims): cluster centers.
	"""
	# Encode features.
	label_nc = get_paired_input_label_channel_number(cfg.data)
	feat_nc = cfg.gen.enc.num_feat_channels
	n_clusters = getattr(cfg.gen.enc, 'num_clusters', 10)
	# Compute features.
	features = {}
	for label in range(label_nc):
	features[label] = np.zeros((0, feat_nc + 1))
	for data in train_data_loader:
	if preprocess is not None:
	data = preprocess(data)
	feat = encode_features(net_E, feat_nc, label_nc,
	data['images'], data['instance_maps'],
	is_cityscapes)
	# We only collect the feature vectors for the master GPU.
	if is_master():
	for label in range(label_nc):
	features[label] = np.append(
	features[label], feat[label], axis=0)
	# Clustering.
	# We only perform clustering for the master GPU.
	if is_master():
	for label in range(label_nc):
	feat = features[label]
	# We only consider segments that are greater than a pre-set
	# threshold.
	feat = feat[feat[:, -1] > small_ratio, :-1]
	if feat.shape[0]:
	n_clusters = min(feat.shape[0], n_clusters)
	kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(feat)
	n, d = kmeans.cluster_centers_.shape
	this_cluster = getattr(net_E, 'cluster_%d' % label)
	this_cluster[0:n, :] = torch.Tensor(
	kmeans.cluster_centers_).float()


	def encode_features(net_E, feat_nc, label_nc, image, inst,
	is_cityscapes=True):
	r"""Compute feature embeddings for an image image.
	TODO(Ting-Chun): To make this funciton dataset independent.

	Args:
	net_E (nn.Module): The encoder network.
	feat_nc (int): Feature dimensions
	label_nc (int): Number of segmentation labels.
	image (tensor): Input image tensor.
	inst (tensor): Input instance map.
	is_cityscapes (bool): Is this is the cityscape dataset? In the
	Cityscapes dataset, the instance labels for car start with 26001,
	26002, ...
	Returns:
	(list of list of numpy vectors): We will have $(label_nc)
	list. For each list, it will record a list of feature vectors of
	dimension $(feat_nc+1) where the first $(feat_nc) dimensions is
	the representative feature of an instance and the last dimension
	is the proportion.
	"""
	# h, w = inst.size()[2:]
	feat_map = net_E(image, inst)
	feature_map_gather = dist_all_gather_tensor(feat_map)
	inst_gathered = dist_all_gather_tensor(inst)
	# Initialize the cluster centers.
	# For each feature vector,
	# 0:feat_nc will be the feature vector.
	# The feat_nc dimension record the percentage of the instance.
	feature = {}
	for i in range(label_nc):
	feature[i] = np.zeros((0, feat_nc + 1))
	if is_master():
	all_feat_map = torch.cat(feature_map_gather, 0)
	all_inst_map = torch.cat(inst_gathered, 0)
	# Scan through the batches.
	for n in range(all_feat_map.size()[0]):
	feat_map = all_feat_map[n:(n + 1), :, :, :]
	inst = all_inst_map[n:(n + 1), :, :, :]
	fh, fw = feat_map.size()[2:]
	inst_np = inst.cpu().numpy().astype(int)
	for i in np.unique(inst_np):
	if is_cityscapes:
	label = i if i < 1000 else i // 1000
	else:
	label = i
	idx = (inst == int(i)).nonzero()
	num = idx.size()[0]
	# We will just pick the middle pixel as its representative
	# feature.
	idx = idx[num // 2, :]
	val = np.zeros((1, feat_nc + 1))
	for k in range(feat_nc):
	# We expect idx[0]=0 and idx[1]=0 as the number of sample
	# per processing is 1 (idx[0]=0) and the channel number of
	# the instance map is 1.
	val[0, k] = feat_map[
	idx[0], idx[1] + k, idx[2], idx[3]].item()
	val[0, feat_nc] = float(num) / (fh * fw)
	feature[label] = np.append(feature[label], val, axis=0)
	return feature
	else:
	return feature


	def get_edges(t):
	r""" Compute edge maps for a given input instance map.

	Args:
	t (4D tensor): Input instance map.
	Returns:
	(4D tensor): Output edge map.
	"""
	edge = torch.cuda.ByteTensor(t.size()).zero_()
	edge[:, :, :, 1:] = edge[:, :, :, 1:] \| (
	t[:, :, :, 1:] != t[:, :, :, :-1]).byte()
	edge[:, :, :, :-1] = edge[:, :, :, :-1] \| (
	t[:, :, :, 1:] != t[:, :, :, :-1]).byte()
	edge[:, :, 1:, :] = edge[:, :, 1:, :] \| (
	t[:, :, 1:, :] != t[:, :, :-1, :]).byte()
	edge[:, :, :-1, :] = edge[:, :, :-1, :] \| (
	t[:, :, 1:, :] != t[:, :, :-1, :]).byte()
	return edge.float()


	def get_train_params(net, param_names_start_with=[], param_names_include=[]):
	r"""Get train parameters.

	Args:
	net (obj): Network object.
	param_names_start_with (list of strings): Params whose names
	start with any of the strings will be trained.
	param_names_include (list of strings): Params whose names include
	any of the strings will be trained.
	"""
	params_to_train = []
	params_dict = net.state_dict()
	list_of_param_names_to_train = set()
	# Iterate through all params in the network and check if we need to
	# train it.
	for key, value in params_dict.items():
	do_train = False
	# If the param name starts with the target string (excluding
	# the 'module' part etc), we will train this param.
	key_s = key.replace('module.', '').replace('averaged_model.', '')
	for param_name in param_names_start_with:
	if key_s.startswith(param_name):
	do_train = True
	list_of_param_names_to_train.add(param_name)

	# Otherwise, if the param name includes the target string,
	# we will also train it.
	if not do_train:
	for param_name in param_names_include:
	if param_name in key_s:
	do_train = True
	full_param_name = \
	key_s[:(key_s.find(param_name) + len(param_name))]
	list_of_param_names_to_train.add(full_param_name)

	# If we decide to train the param, add it to the list to train.
	if do_train:
	module = net
	key_list = key.split('.')
	for k in key_list:
	module = getattr(module, k)
	params_to_train += [module]

	print('Training layers: ', sorted(list_of_param_names_to_train))
	return params_to_train


	def get_optimizer_with_params(cfg, net_G, net_D, param_names_start_with=[],
	param_names_include=[]):
	r"""Return the optimizer object.

	Args:
	cfg (obj): Global config.
	net_G (obj): Generator network.
	net_D (obj): Discriminator network.
	param_names_start_with (list of strings): Params whose names
	start with any of the strings will be trained.
	param_names_include (list of strings): Params whose names include
	any of the strings will be trained.
	"""
	# If any of the param name lists is not empty, will only train
	# these params. Otherwise will train the entire network (all params).
	if param_names_start_with or param_names_include:
	params = get_train_params(net_G, param_names_start_with,
	param_names_include)
	else:
	params = net_G.parameters()

	opt_G = get_optimizer_for_params(cfg.gen_opt, params)
	opt_D = get_optimizer(cfg.dis_opt, net_D)
	return wrap_model_and_optimizer(cfg, net_G, net_D, opt_G, opt_D)