Spaces:

venite
/

sat3density

Runtime error

File size: 9,500 Bytes

f670afc

# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, check out LICENSE.md
"""Utils for the pix2pixHD model."""
import numpy as np
import torch

from imaginaire.utils.data import get_paired_input_label_channel_number
from imaginaire.utils.distributed import dist_all_gather_tensor, is_master
from imaginaire.utils.distributed import master_only_print as print
from imaginaire.utils.trainer import (get_optimizer, get_optimizer_for_params,
                                      wrap_model_and_optimizer)
from sklearn.cluster import KMeans


def cluster_features(cfg, train_data_loader, net_E,
                     preprocess=None, small_ratio=0.0625, is_cityscapes=True):
    r"""Use clustering to compute the features.

    Args:
        cfg (obj): Global configuration file.
        train_data_loader (obj): Dataloader for iterate through the training
            set.
        net_E (nn.Module): Pytorch network.
        preprocess (function): Pre-processing function.
        small_ratio (float): We only consider instance that at least occupy
            $(small_ratio) amount of image space.
        is_cityscapes (bool): Is this is the cityscape dataset? In the
            Cityscapes dataset, the instance labels for car start with 26001,
            26002, ...

    Returns:
        ( num_labels x num_cluster_centers x feature_dims): cluster centers.
    """
    # Encode features.
    label_nc = get_paired_input_label_channel_number(cfg.data)
    feat_nc = cfg.gen.enc.num_feat_channels
    n_clusters = getattr(cfg.gen.enc, 'num_clusters', 10)
    # Compute features.
    features = {}
    for label in range(label_nc):
        features[label] = np.zeros((0, feat_nc + 1))
    for data in train_data_loader:
        if preprocess is not None:
            data = preprocess(data)
        feat = encode_features(net_E, feat_nc, label_nc,
                               data['images'], data['instance_maps'],
                               is_cityscapes)
        # We only collect the feature vectors for the master GPU.
        if is_master():
            for label in range(label_nc):
                features[label] = np.append(
                    features[label], feat[label], axis=0)
    # Clustering.
    # We only perform clustering for the master GPU.
    if is_master():
        for label in range(label_nc):
            feat = features[label]
            # We only consider segments that are greater than a pre-set
            # threshold.
            feat = feat[feat[:, -1] > small_ratio, :-1]
            if feat.shape[0]:
                n_clusters = min(feat.shape[0], n_clusters)
                kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(feat)
                n, d = kmeans.cluster_centers_.shape
                this_cluster = getattr(net_E, 'cluster_%d' % label)
                this_cluster[0:n, :] = torch.Tensor(
                    kmeans.cluster_centers_).float()


def encode_features(net_E, feat_nc, label_nc, image, inst,
                    is_cityscapes=True):
    r"""Compute feature embeddings for an image image.
    TODO(Ting-Chun): To make this funciton dataset independent.

    Args:
        net_E (nn.Module): The encoder network.
        feat_nc (int): Feature dimensions
        label_nc (int): Number of segmentation labels.
        image (tensor): Input image tensor.
        inst (tensor): Input instance map.
        is_cityscapes (bool): Is this is the cityscape dataset? In the
            Cityscapes dataset, the instance labels for car start with 26001,
            26002, ...
    Returns:
        (list of list of numpy vectors): We will have $(label_nc)
            list. For each list, it will record a list of feature vectors of
            dimension $(feat_nc+1) where the first $(feat_nc) dimensions is
            the representative feature of an instance and the last dimension
            is the proportion.
    """
    # h, w = inst.size()[2:]
    feat_map = net_E(image, inst)
    feature_map_gather = dist_all_gather_tensor(feat_map)
    inst_gathered = dist_all_gather_tensor(inst)
    # Initialize the cluster centers.
    # For each feature vector,
    #   0:feat_nc will be the feature vector.
    #   The feat_nc dimension record the percentage of the instance.
    feature = {}
    for i in range(label_nc):
        feature[i] = np.zeros((0, feat_nc + 1))
    if is_master():
        all_feat_map = torch.cat(feature_map_gather, 0)
        all_inst_map = torch.cat(inst_gathered, 0)
        # Scan through the batches.
        for n in range(all_feat_map.size()[0]):
            feat_map = all_feat_map[n:(n + 1), :, :, :]
            inst = all_inst_map[n:(n + 1), :, :, :]
            fh, fw = feat_map.size()[2:]
            inst_np = inst.cpu().numpy().astype(int)
            for i in np.unique(inst_np):
                if is_cityscapes:
                    label = i if i < 1000 else i // 1000
                else:
                    label = i
                idx = (inst == int(i)).nonzero()
                num = idx.size()[0]
                # We will just pick the middle pixel as its representative
                # feature.
                idx = idx[num // 2, :]
                val = np.zeros((1, feat_nc + 1))
                for k in range(feat_nc):
                    # We expect idx[0]=0 and idx[1]=0 as the number of sample
                    # per processing is 1 (idx[0]=0) and the channel number of
                    # the instance map is 1.
                    val[0, k] = feat_map[
                        idx[0], idx[1] + k, idx[2], idx[3]].item()
                val[0, feat_nc] = float(num) / (fh * fw)
                feature[label] = np.append(feature[label], val, axis=0)
        return feature
    else:
        return feature


def get_edges(t):
    r""" Compute edge maps for a given input instance map.

    Args:
        t (4D tensor): Input instance map.
    Returns:
        (4D tensor): Output edge map.
    """
    edge = torch.cuda.ByteTensor(t.size()).zero_()
    edge[:, :, :, 1:] = edge[:, :, :, 1:] | (
        t[:, :, :, 1:] != t[:, :, :, :-1]).byte()
    edge[:, :, :, :-1] = edge[:, :, :, :-1] | (
        t[:, :, :, 1:] != t[:, :, :, :-1]).byte()
    edge[:, :, 1:, :] = edge[:, :, 1:, :] | (
        t[:, :, 1:, :] != t[:, :, :-1, :]).byte()
    edge[:, :, :-1, :] = edge[:, :, :-1, :] | (
        t[:, :, 1:, :] != t[:, :, :-1, :]).byte()
    return edge.float()


def get_train_params(net, param_names_start_with=[], param_names_include=[]):
    r"""Get train parameters.

    Args:
        net (obj): Network object.
        param_names_start_with (list of strings): Params whose names
            start with any of the strings will be trained.
        param_names_include (list of strings): Params whose names include
            any of the strings will be trained.
    """
    params_to_train = []
    params_dict = net.state_dict()
    list_of_param_names_to_train = set()
    # Iterate through all params in the network and check if we need to
    # train it.
    for key, value in params_dict.items():
        do_train = False
        # If the param name starts with the target string (excluding
        # the 'module' part etc), we will train this param.
        key_s = key.replace('module.', '').replace('averaged_model.', '')
        for param_name in param_names_start_with:
            if key_s.startswith(param_name):
                do_train = True
                list_of_param_names_to_train.add(param_name)

        # Otherwise, if the param name includes the target string,
        # we will also train it.
        if not do_train:
            for param_name in param_names_include:
                if param_name in key_s:
                    do_train = True
                    full_param_name = \
                        key_s[:(key_s.find(param_name) + len(param_name))]
                    list_of_param_names_to_train.add(full_param_name)

        # If we decide to train the param, add it to the list to train.
        if do_train:
            module = net
            key_list = key.split('.')
            for k in key_list:
                module = getattr(module, k)
            params_to_train += [module]

    print('Training layers: ', sorted(list_of_param_names_to_train))
    return params_to_train


def get_optimizer_with_params(cfg, net_G, net_D, param_names_start_with=[],
                              param_names_include=[]):
    r"""Return the optimizer object.

    Args:
        cfg (obj): Global config.
        net_G (obj): Generator network.
        net_D (obj): Discriminator network.
        param_names_start_with (list of strings): Params whose names
            start with any of the strings will be trained.
        param_names_include (list of strings): Params whose names include
            any of the strings will be trained.
    """
    # If any of the param name lists is not empty, will only train
    # these params. Otherwise will train the entire network (all params).
    if param_names_start_with or param_names_include:
        params = get_train_params(net_G, param_names_start_with,
                                  param_names_include)
    else:
        params = net_G.parameters()

    opt_G = get_optimizer_for_params(cfg.gen_opt, params)
    opt_D = get_optimizer(cfg.dis_opt, net_D)
    return wrap_model_and_optimizer(cfg, net_G, net_D, opt_G, opt_D)