Spaces:
Runtime error
Runtime error
# coding=utf-8 | |
# Copyright 2021 The Deeplab2 Authors. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""This file contains code to get a sample from a dataset.""" | |
import functools | |
import numpy as np | |
import tensorflow as tf | |
from deeplab2 import common | |
from deeplab2.data import dataset_utils | |
from deeplab2.data.preprocessing import input_preprocessing as preprocessing | |
def _compute_gaussian_from_std(sigma): | |
"""Computes the Gaussian and its size from a given standard deviation.""" | |
size = int(6 * sigma + 3) | |
x = np.arange(size, dtype=np.float) | |
y = x[:, np.newaxis] | |
x0, y0 = 3 * sigma + 1, 3 * sigma + 1 | |
return np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2)), size | |
class PanopticSampleGenerator: | |
"""This class generates samples from images and labels.""" | |
def __init__(self, | |
dataset_info, | |
is_training, | |
crop_size, | |
min_resize_value=None, | |
max_resize_value=None, | |
resize_factor=None, | |
min_scale_factor=1., | |
max_scale_factor=1., | |
scale_factor_step_size=0, | |
autoaugment_policy_name=None, | |
only_semantic_annotations=False, | |
thing_id_mask_annotations=False, | |
max_thing_id=128, | |
sigma=8, | |
focus_small_instances=None): | |
"""Initializes the panoptic segmentation generator. | |
Args: | |
dataset_info: A dictionary with the following keys. | |
- `name`: String, dataset name. | |
- `ignore_label`: Integer, ignore label. | |
- `class_has_instances_list`: A list of integers indicating which | |
class has instance annotations. | |
- `panoptic_label_divisor`: Integer, panoptic label divisor. | |
- `num_classes`: Integer, number of classes. | |
- `is_video_dataset`: Boolean, is video dataset or not. | |
is_training: Boolean, is training mode or not. | |
crop_size: Image crop size [height, width]. | |
min_resize_value: A 2-tuple of (height, width), desired minimum value | |
after resize. If a single element is given, then height and width share | |
the same value. None, empty or having 0 indicates no minimum value will | |
be used. | |
max_resize_value: A 2-tuple of (height, width), maximum allowed value | |
after resize. If a single element is given, then height and width | |
share the same value. None, empty or having 0 indicates no maximum | |
value will be used. | |
resize_factor: Resized dimensions are multiple of factor plus one. | |
min_scale_factor: Minimum scale factor for random scale augmentation. | |
max_scale_factor: Maximum scale factor for random scale augmentation. | |
scale_factor_step_size: The step size from min scale factor to max scale | |
factor. The input is randomly scaled based on the value of | |
(min_scale_factor, max_scale_factor, scale_factor_step_size). | |
autoaugment_policy_name: String, autoaugment policy name. See | |
autoaugment_policy.py for available policies. | |
only_semantic_annotations: An optional flag indicating whether the model | |
needs only semantic annotations (default: False). | |
thing_id_mask_annotations: An optional flag indicating whether the model | |
needs thing_id_mask annotations. When `thing_id_mask_annotations` is | |
True, we will additionally return mask annotation for each `thing` | |
instance, encoded with a unique thing_id. This ground-truth annotation | |
could be used to learn a better segmentation mask for each instance. | |
`thing_id` indicates the number of unique thing-ID to each instance in | |
an image, starting the counting from 0 (default: False). | |
max_thing_id: The maximum number of possible thing instances per image. It | |
is used together when thing_id_mask_annotations = True, representing the | |
maximum thing ID encoded in the thing_id_mask. (default: 128). | |
sigma: The standard deviation of the Gaussian used to encode the center | |
keypoint (default: 8). | |
focus_small_instances: An optional dict that defines how to deal with | |
small instances (default: None): | |
-`threshold`: An integer defining the threshold pixel number for an | |
instance to be considered small. | |
-`weight`: A number that defines the loss weight for small instances. | |
""" | |
self._dataset_info = dataset_info | |
self._ignore_label = self._dataset_info['ignore_label'] | |
self._only_semantic_annotations = only_semantic_annotations | |
self._sigma = sigma | |
self._instance_area_threshold = 0 | |
self._small_instance_weight = 1.0 | |
self._thing_id_mask_annotations = thing_id_mask_annotations | |
self._max_thing_id = max_thing_id | |
self._is_training = is_training | |
self._preprocessing_fn = functools.partial( | |
preprocessing.preprocess_image_and_label, | |
crop_height=crop_size[0], | |
crop_width=crop_size[1], | |
min_resize_value=min_resize_value, | |
max_resize_value=max_resize_value, | |
resize_factor=resize_factor, | |
min_scale_factor=min_scale_factor, | |
max_scale_factor=max_scale_factor, | |
scale_factor_step_size=scale_factor_step_size, | |
autoaugment_policy_name=autoaugment_policy_name, | |
ignore_label=self._ignore_label * | |
self._dataset_info['panoptic_label_divisor'], | |
is_training=self._is_training) | |
if focus_small_instances is not None: | |
self._instance_area_threshold = focus_small_instances['threshold'] | |
self._small_instance_weight = focus_small_instances['weight'] | |
self._gaussian, self._gaussian_size = _compute_gaussian_from_std( | |
self._sigma) | |
self._gaussian = tf.cast(tf.reshape(self._gaussian, [-1]), tf.float32) | |
def __call__(self, sample_dict): | |
"""Gets a sample. | |
Args: | |
sample_dict: A dictionary with the following keys and values: | |
- `image`: A tensor of shape [image_height, image_width, 3]. | |
- `image_name`: String, image name. | |
- `label`: A tensor of shape [label_height, label_width, 1] or None. | |
- `height`: An integer specifying the height of the image. | |
- `width`: An integer specifying the width of the image. | |
- `sequence`: An optional string specifying the sequence name. | |
- `prev_image`: An optional tensor of the same shape as `image`. | |
- `prev_label`: An optional tensor of the same shape as `label`. | |
- `next_image`: An optional next-frame tensor of the shape of `image`. | |
- `next_label`: An optional next-frame tensor of the shape of `label`. | |
Returns: | |
sample: A dictionary storing required data for panoptic segmentation. | |
""" | |
return self.call(**sample_dict) | |
def call(self, | |
image, | |
image_name, | |
label, | |
height, | |
width, | |
sequence='', | |
prev_image=None, | |
prev_label=None, | |
next_image=None, | |
next_label=None): | |
"""Gets a sample. | |
Args: | |
image: A tensor of shape [image_height, image_width, 3]. | |
image_name: String, image name. | |
label: A tensor of shape [label_height, label_width, 1] or None. | |
height: An integer specifying the height of the image. | |
width: An integer specifying the width of the image. | |
sequence: An optional string specifying the sequence name. | |
prev_image: An optional tensor of shape [image_height, image_width, 3]. | |
prev_label: An optional tensor of shape [label_height, label_width, 1]. | |
next_image: An optional tensor of shape [image_height, image_width, 3]. | |
next_label: An optional tensor of shape [label_height, label_width, 1]. | |
Returns: | |
sample: A dictionary storing required data for panoptic segmentation. | |
Raises: | |
ValueError: An error occurs when the label shape is invalid. | |
NotImplementedError: An error occurs when thing_id_mask_annotations comes | |
together with prev_image or prev_label, not currently implemented. | |
""" | |
if label is not None: | |
label.get_shape().assert_is_compatible_with( | |
tf.TensorShape([None, None, 1])) | |
original_label = tf.cast(label, dtype=tf.int32, name='original_label') | |
if next_label is not None: | |
original_next_label = tf.cast( | |
next_label, dtype=tf.int32, name='original_next_label') | |
# Reusing the preprocessing function for both next and prev samples. | |
if next_image is not None: | |
resized_image, image, label, next_image, next_label = ( | |
self._preprocessing_fn( | |
image, label, prev_image=next_image, prev_label=next_label)) | |
else: | |
resized_image, image, label, prev_image, prev_label = ( | |
self._preprocessing_fn( | |
image, label, prev_image=prev_image, prev_label=prev_label)) | |
sample = { | |
common.IMAGE: image | |
} | |
if prev_image is not None: | |
sample[common.IMAGE] = tf.concat([image, prev_image], axis=2) | |
if next_image is not None: | |
sample[common.NEXT_IMAGE] = next_image | |
sample[common.IMAGE] = tf.concat([image, next_image], axis=2) | |
if label is not None: | |
# Panoptic label for crowd regions will be ignore_label. | |
semantic_label, panoptic_label, thing_mask, crowd_region = ( | |
dataset_utils.get_semantic_and_panoptic_label( | |
self._dataset_info, label, self._ignore_label)) | |
sample[common.GT_SEMANTIC_KEY] = tf.squeeze(semantic_label, axis=2) | |
semantic_weights = tf.ones_like(semantic_label, dtype=tf.float32) | |
sample[common.SEMANTIC_LOSS_WEIGHT_KEY] = tf.squeeze( | |
semantic_weights, axis=2) | |
sample[common.GT_IS_CROWD] = tf.squeeze(crowd_region, axis=2) | |
if not self._only_semantic_annotations: | |
# The sample will have the original label including crowd regions. | |
sample[common.GT_PANOPTIC_KEY] = tf.squeeze(label, axis=2) | |
# Compute center loss for all non-crowd and non-ignore pixels. | |
non_crowd_and_non_ignore_regions = tf.logical_and( | |
tf.logical_not(crowd_region), | |
tf.not_equal(semantic_label, self._ignore_label)) | |
sample[common.CENTER_LOSS_WEIGHT_KEY] = tf.squeeze(tf.cast( | |
non_crowd_and_non_ignore_regions, tf.float32), axis=2) | |
# Compute regression loss only for thing pixels that are not crowd. | |
non_crowd_things = tf.logical_and( | |
tf.logical_not(crowd_region), thing_mask) | |
sample[common.REGRESSION_LOSS_WEIGHT_KEY] = tf.squeeze(tf.cast( | |
non_crowd_things, tf.float32), axis=2) | |
prev_panoptic_label = None | |
next_panoptic_label = None | |
if prev_label is not None: | |
_, prev_panoptic_label, _, _ = ( | |
dataset_utils.get_semantic_and_panoptic_label( | |
self._dataset_info, prev_label, self._ignore_label)) | |
if next_label is not None: | |
_, next_panoptic_label, _, _ = ( | |
dataset_utils.get_semantic_and_panoptic_label( | |
self._dataset_info, next_label, self._ignore_label)) | |
(sample[common.GT_INSTANCE_CENTER_KEY], | |
sample[common.GT_INSTANCE_REGRESSION_KEY], | |
sample[common.SEMANTIC_LOSS_WEIGHT_KEY], | |
prev_center_map, | |
frame_center_offsets, | |
next_offset) = self._generate_gt_center_and_offset( | |
panoptic_label, semantic_weights, prev_panoptic_label, | |
next_panoptic_label) | |
sample[common.GT_INSTANCE_REGRESSION_KEY] = tf.cast( | |
sample[common.GT_INSTANCE_REGRESSION_KEY], tf.float32) | |
if next_label is not None: | |
sample[common.GT_NEXT_INSTANCE_REGRESSION_KEY] = tf.cast( | |
next_offset, tf.float32) | |
sample[common.NEXT_REGRESSION_LOSS_WEIGHT_KEY] = tf.cast( | |
tf.greater(tf.reduce_sum(tf.abs(next_offset), axis=2), 0), | |
tf.float32) | |
# Only squeeze center map and semantic loss weights, as regression map | |
# has two channels (x and y offsets). | |
sample[common.GT_INSTANCE_CENTER_KEY] = tf.squeeze( | |
sample[common.GT_INSTANCE_CENTER_KEY], axis=2) | |
sample[common.SEMANTIC_LOSS_WEIGHT_KEY] = tf.squeeze( | |
sample[common.SEMANTIC_LOSS_WEIGHT_KEY], axis=2) | |
if prev_label is not None: | |
sample[common.GT_FRAME_OFFSET_KEY] = frame_center_offsets | |
sample[common.GT_FRAME_OFFSET_KEY] = tf.cast( | |
sample[common.GT_FRAME_OFFSET_KEY], tf.float32) | |
frame_offsets_present = tf.logical_or( | |
tf.not_equal(frame_center_offsets[..., 0], 0), | |
tf.not_equal(frame_center_offsets[..., 1], 0)) | |
sample[common.FRAME_REGRESSION_LOSS_WEIGHT_KEY] = tf.cast( | |
frame_offsets_present, tf.float32) | |
if self._is_training: | |
sample[common.IMAGE] = tf.concat( | |
[sample[common.IMAGE], prev_center_map], axis=2) | |
if self._thing_id_mask_annotations: | |
if any([prev_image is not None, | |
prev_label is not None, | |
next_image is not None, | |
next_label is not None]): | |
raise NotImplementedError( | |
'Current implementation of Max-DeepLab does not support ' | |
+ 'prev_image, prev_label, next_image, or next_label.') | |
thing_id_mask, thing_id_class = ( | |
self._generate_thing_id_mask_and_class( | |
panoptic_label, non_crowd_things)) | |
sample[common.GT_THING_ID_MASK_KEY] = tf.squeeze( | |
thing_id_mask, axis=2) | |
sample[common.GT_THING_ID_CLASS_KEY] = thing_id_class | |
if not self._is_training: | |
# Resized image is only used during visualization. | |
sample[common.RESIZED_IMAGE] = resized_image | |
sample[common.IMAGE_NAME] = image_name | |
sample[common.GT_SIZE_RAW] = tf.stack([height, width], axis=0) | |
if self._dataset_info['is_video_dataset']: | |
sample[common.SEQUENCE_ID] = sequence | |
# Keep original labels for evaluation. | |
if label is not None: | |
orig_semantic_label, _, _, orig_crowd_region = ( | |
dataset_utils.get_semantic_and_panoptic_label( | |
self._dataset_info, original_label, self._ignore_label)) | |
sample[common.GT_SEMANTIC_RAW] = tf.squeeze(orig_semantic_label, axis=2) | |
if not self._only_semantic_annotations: | |
sample[common.GT_PANOPTIC_RAW] = tf.squeeze(original_label, axis=2) | |
sample[common.GT_IS_CROWD_RAW] = tf.squeeze(orig_crowd_region) | |
if next_label is not None: | |
sample[common.GT_NEXT_PANOPTIC_RAW] = tf.squeeze( | |
original_next_label, axis=2) | |
return sample | |
def _generate_thing_id_mask_and_class(self, | |
panoptic_label, | |
non_crowd_things): | |
"""Generates the ground-truth thing-ID masks and their class labels. | |
It computes thing-ID mask and class with unique ID for each thing instance. | |
`thing_id` indicates the number of unique thing-ID to each instance in an | |
image, starting the counting from 0. Each pixel in thing_id_mask is labeled | |
with the corresponding thing-ID. | |
Args: | |
panoptic_label: A tf.Tensor of shape [height, width, 1]. | |
non_crowd_things: A tf.Tensor of shape [height, width, 1], indicating | |
non-crowd and thing-class regions. | |
Returns: | |
thing_id_mask: A tf.Tensor of shape [height, width, 1]. It assigns each | |
non-crowd thing instance a unique mask-ID label, starting from 0. | |
Unassigned pixels are set to -1. | |
thing_id_class: A tf.Tensor of shape [max_thing_id]. It contains semantic | |
ID of each instance assigned to thing_id_mask. The remaining | |
(max_thing_id - num_things) elements are set to -1. | |
Raises: | |
ValueError: An error occurs when the thing-ID mask contains stuff or crowd | |
region. | |
ValueError: An error occurs when thing_count is greater or equal to | |
self._max_thing_id. | |
""" | |
unique_ids, _ = tf.unique(tf.reshape(panoptic_label, [-1])) | |
thing_id_mask = -tf.ones_like(panoptic_label) | |
thing_id_class = -tf.ones(self._max_thing_id) | |
thing_count = 0 | |
for panoptic_id in unique_ids: | |
semantic_id = panoptic_id // self._dataset_info['panoptic_label_divisor'] | |
# Filter out IDs that are not thing instances (i.e., IDs for ignore_label, | |
# stuff classes or crowd). Stuff classes and crowd regions both have IDs | |
# of the form panoptic_id = semantic_id * label_divisor (i.e., instance id | |
# = 0) | |
if (semantic_id == self._dataset_info['ignore_label'] or | |
panoptic_id % self._dataset_info['panoptic_label_divisor'] == 0): | |
continue | |
assert_stuff_crowd = tf.debugging.Assert( | |
tf.reduce_all(non_crowd_things[panoptic_label == panoptic_id]), | |
['thing-ID mask here must not contain stuff or crowd region.']) | |
with tf.control_dependencies([assert_stuff_crowd]): | |
panoptic_id = tf.identity(panoptic_id) | |
thing_id_mask = tf.where(panoptic_label == panoptic_id, | |
thing_count, thing_id_mask) | |
assert_thing_count = tf.debugging.Assert( | |
thing_count < self._max_thing_id, | |
['thing_count must be smaller than self._max_thing_id.']) | |
with tf.control_dependencies([assert_thing_count]): | |
thing_count = tf.identity(thing_count) | |
thing_id_class = tf.tensor_scatter_nd_update( | |
thing_id_class, [[thing_count]], [semantic_id]) | |
thing_count += 1 | |
return thing_id_mask, thing_id_class | |
def _generate_prev_centers_with_noise(self, | |
panoptic_label, | |
offset_noise_factor=0.05, | |
false_positive_rate=0.2, | |
false_positive_noise_factor=0.05): | |
"""Generates noisy center predictions for the previous frame. | |
Args: | |
panoptic_label: A tf.Tensor of shape [height, width, 1]. | |
offset_noise_factor: An optional float defining the maximum fraction of | |
the object size that is used to displace the previous center. | |
false_positive_rate: An optional float indicating at which probability | |
false positives should be added. | |
false_positive_noise_factor: An optional float defining the maximum | |
fraction of the object size that is used to displace the false positive | |
center. | |
Returns: | |
A tuple of (center, ids_to_center) with both being tf.Tensor of shape | |
[height, width, 1] and shape [N, 2] where N is the number of unique IDs. | |
""" | |
height = tf.shape(panoptic_label)[0] | |
width = tf.shape(panoptic_label)[1] | |
# Pad center to make boundary handling easier. | |
center_pad_begin = int(round(3 * self._sigma + 1)) | |
center_pad_end = int(round(3 * self._sigma + 2)) | |
center_pad = center_pad_begin + center_pad_end | |
center = tf.zeros((height + center_pad, width + center_pad)) | |
unique_ids, _ = tf.unique(tf.reshape(panoptic_label, [-1])) | |
ids_to_center_x = tf.zeros_like(unique_ids, dtype=tf.int32) | |
ids_to_center_y = tf.zeros_like(unique_ids, dtype=tf.int32) | |
for panoptic_id in unique_ids: | |
semantic_id = panoptic_id // self._dataset_info['panoptic_label_divisor'] | |
# Filter out IDs that should be ignored, are stuff classes or crowd. | |
# Stuff classes and crowd regions both have IDs of the form panoptic_id = | |
# semantic_id * label_divisor | |
if (semantic_id == self._dataset_info['ignore_label'] or | |
panoptic_id % self._dataset_info['panoptic_label_divisor'] == 0): | |
continue | |
# Convert [[y0, x0, 0], ...] to [[y0, ...], [x0, ...], [0, ...]]. | |
mask_index = tf.cast( | |
tf.transpose(tf.where(panoptic_label == panoptic_id)), tf.float32) | |
centers = tf.reduce_mean(mask_index, axis=1) | |
bbox_size = ( | |
tf.reduce_max(mask_index, axis=1) - tf.reduce_min(mask_index, axis=1)) | |
# Add noise. | |
center_y = ( | |
centers[0] + tf.random.normal([], dtype=tf.float32) * | |
offset_noise_factor * bbox_size[0]) | |
center_x = ( | |
centers[1] + tf.random.normal([], dtype=tf.float32) * | |
offset_noise_factor * bbox_size[1]) | |
center_x = tf.minimum( | |
tf.maximum(tf.cast(tf.round(center_x), tf.int32), 0), width - 1) | |
center_y = tf.minimum( | |
tf.maximum(tf.cast(tf.round(center_y), tf.int32), 0), height - 1) | |
id_index = tf.where(tf.equal(panoptic_id, unique_ids)) | |
ids_to_center_x = tf.tensor_scatter_nd_update( | |
ids_to_center_x, id_index, tf.expand_dims(center_x, axis=0)) | |
ids_to_center_y = tf.tensor_scatter_nd_update( | |
ids_to_center_y, id_index, tf.expand_dims(center_y, axis=0)) | |
def add_center_gaussian(center_x_coord, center_y_coord, center): | |
# Due to the padding with center_pad_begin in center, the computed | |
# center becomes the upper left corner in the center tensor. | |
upper_left = center_x_coord, center_y_coord | |
bottom_right = (upper_left[0] + self._gaussian_size, | |
upper_left[1] + self._gaussian_size) | |
indices_x, indices_y = tf.meshgrid( | |
tf.range(upper_left[0], bottom_right[0]), | |
tf.range(upper_left[1], bottom_right[1])) | |
indices = tf.transpose( | |
tf.stack([tf.reshape(indices_y, [-1]), | |
tf.reshape(indices_x, [-1])])) | |
return tf.tensor_scatter_nd_max( | |
center, indices, self._gaussian, name='center_scatter') | |
center = add_center_gaussian(center_x, center_y, center) | |
# Generate false positives. | |
center_y = ( | |
tf.cast(center_y, dtype=tf.float32) + | |
tf.random.normal([], dtype=tf.float32) * false_positive_noise_factor * | |
bbox_size[0]) | |
center_x = ( | |
tf.cast(center_x, dtype=tf.float32) + | |
tf.random.normal([], dtype=tf.float32) * false_positive_noise_factor * | |
bbox_size[1]) | |
center_x = tf.minimum( | |
tf.maximum(tf.cast(tf.round(center_x), tf.int32), 0), width - 1) | |
center_y = tf.minimum( | |
tf.maximum(tf.cast(tf.round(center_y), tf.int32), 0), height - 1) | |
# Draw a sample to decide whether to add a false positive or not. | |
center = center + tf.cast( | |
tf.random.uniform([], dtype=tf.float32) < false_positive_rate, | |
tf.float32) * ( | |
add_center_gaussian(center_x, center_y, center) - center) | |
center = center[center_pad_begin:(center_pad_begin + height), | |
center_pad_begin:(center_pad_begin + width)] | |
center = tf.expand_dims(center, -1) | |
return center, unique_ids, ids_to_center_x, ids_to_center_y | |
def _generate_gt_center_and_offset(self, | |
panoptic_label, | |
semantic_weights, | |
prev_panoptic_label=None, | |
next_panoptic_label=None): | |
"""Generates the ground-truth center and offset from the panoptic labels. | |
Additionally, the per-pixel weights for the semantic branch are increased | |
for small instances. In case, prev_panoptic_label is passed, it also | |
computes the previous center heatmap with random noise and the offsets | |
between center maps. | |
Args: | |
panoptic_label: A tf.Tensor of shape [height, width, 1]. | |
semantic_weights: A tf.Tensor of shape [height, width, 1]. | |
prev_panoptic_label: An optional tf.Tensor of shape [height, width, 1]. | |
next_panoptic_label: An optional tf.Tensor of shape [height, width, 1]. | |
Returns: | |
A tuple (center, offsets, weights, prev_center, frame_offset*, | |
next_offset) with each being a tf.Tensor of shape [height, width, 1 (2*)]. | |
If prev_panoptic_label is None, prev_center and frame_offset are None. | |
If next_panoptic_label is None, next_offset is None. | |
""" | |
height = tf.shape(panoptic_label)[0] | |
width = tf.shape(panoptic_label)[1] | |
# Pad center to make boundary handling easier. | |
center_pad_begin = int(round(3 * self._sigma + 1)) | |
center_pad_end = int(round(3 * self._sigma + 2)) | |
center_pad = center_pad_begin + center_pad_end | |
center = tf.zeros((height + center_pad, width + center_pad)) | |
offset_x = tf.zeros((height, width, 1), dtype=tf.int32) | |
offset_y = tf.zeros((height, width, 1), dtype=tf.int32) | |
unique_ids, _ = tf.unique(tf.reshape(panoptic_label, [-1])) | |
prev_center = None | |
frame_offsets = None | |
# Due to loop handling in tensorflow, these variables had to be defined for | |
# all cases. | |
frame_offset_x = tf.zeros((height, width, 1), dtype=tf.int32) | |
frame_offset_y = tf.zeros((height, width, 1), dtype=tf.int32) | |
# Next-frame instance offsets. | |
next_offset = None | |
next_offset_y = tf.zeros((height, width, 1), dtype=tf.int32) | |
next_offset_x = tf.zeros((height, width, 1), dtype=tf.int32) | |
if prev_panoptic_label is not None: | |
(prev_center, prev_unique_ids, prev_centers_x, prev_centers_y | |
) = self._generate_prev_centers_with_noise(prev_panoptic_label) | |
for panoptic_id in unique_ids: | |
semantic_id = panoptic_id // self._dataset_info['panoptic_label_divisor'] | |
# Filter out IDs that should be ignored, are stuff classes or crowd. | |
# Stuff classes and crowd regions both have IDs of the form panopti_id = | |
# semantic_id * label_divisor | |
if (semantic_id == self._dataset_info['ignore_label'] or | |
panoptic_id % self._dataset_info['panoptic_label_divisor'] == 0): | |
continue | |
# Convert [[y0, x0, 0], ...] to [[y0, ...], [x0, ...], [0, ...]]. | |
mask_index = tf.transpose(tf.where(panoptic_label == panoptic_id)) | |
mask_y_index = mask_index[0] | |
mask_x_index = mask_index[1] | |
next_mask_index = None | |
next_mask_y_index = None | |
next_mask_x_index = None | |
if next_panoptic_label is not None: | |
next_mask_index = tf.transpose( | |
tf.where(next_panoptic_label == panoptic_id)) | |
next_mask_y_index = next_mask_index[0] | |
next_mask_x_index = next_mask_index[1] | |
instance_area = tf.shape(mask_x_index) | |
if instance_area < self._instance_area_threshold: | |
semantic_weights = tf.where(panoptic_label == panoptic_id, | |
self._small_instance_weight, | |
semantic_weights) | |
centers = tf.reduce_mean(tf.cast(mask_index, tf.float32), axis=1) | |
center_x = tf.cast(tf.round(centers[1]), tf.int32) | |
center_y = tf.cast(tf.round(centers[0]), tf.int32) | |
# Due to the padding with center_pad_begin in center, the computed center | |
# becomes the upper left corner in the center tensor. | |
upper_left = center_x, center_y | |
bottom_right = (upper_left[0] + self._gaussian_size, | |
upper_left[1] + self._gaussian_size) | |
indices_x, indices_y = tf.meshgrid( | |
tf.range(upper_left[0], bottom_right[0]), | |
tf.range(upper_left[1], bottom_right[1])) | |
indices = tf.transpose( | |
tf.stack([tf.reshape(indices_y, [-1]), | |
tf.reshape(indices_x, [-1])])) | |
center = tf.tensor_scatter_nd_max( | |
center, indices, self._gaussian, name='center_scatter') | |
offset_y = tf.tensor_scatter_nd_update( | |
offset_y, | |
tf.transpose(mask_index), | |
center_y - tf.cast(mask_y_index, tf.int32), | |
name='offset_y_scatter') | |
offset_x = tf.tensor_scatter_nd_update( | |
offset_x, | |
tf.transpose(mask_index), | |
center_x - tf.cast(mask_x_index, tf.int32), | |
name='offset_x_scatter') | |
if prev_panoptic_label is not None: | |
mask = tf.equal(prev_unique_ids, panoptic_id) | |
if tf.math.count_nonzero(mask) > 0: | |
prev_center_x = prev_centers_x[mask] | |
prev_center_y = prev_centers_y[mask] | |
frame_offset_y = tf.tensor_scatter_nd_update( | |
frame_offset_y, | |
tf.transpose(mask_index), | |
prev_center_y - tf.cast(mask_y_index, tf.int32), | |
name='frame_offset_y_scatter') | |
frame_offset_x = tf.tensor_scatter_nd_update( | |
frame_offset_x, | |
tf.transpose(mask_index), | |
prev_center_x - tf.cast(mask_x_index, tf.int32), | |
name='frame_offset_x_scatter') | |
if next_panoptic_label is not None: | |
next_offset_y = tf.tensor_scatter_nd_update( | |
next_offset_y, | |
tf.transpose(next_mask_index), | |
center_y - tf.cast(next_mask_y_index, tf.int32), | |
name='next_offset_y_scatter') | |
next_offset_x = tf.tensor_scatter_nd_update( | |
next_offset_x, | |
tf.transpose(next_mask_index), | |
center_x - tf.cast(next_mask_x_index, tf.int32), | |
name='next_offset_x_scatter') | |
offset = tf.concat([offset_y, offset_x], axis=2) | |
center = center[center_pad_begin:(center_pad_begin + height), | |
center_pad_begin:(center_pad_begin + width)] | |
center = tf.expand_dims(center, -1) | |
if prev_panoptic_label is not None: | |
frame_offsets = tf.concat([frame_offset_y, frame_offset_x], axis=2) | |
if next_panoptic_label is not None: | |
next_offset = tf.concat([next_offset_y, next_offset_x], axis=2) | |
return (center, offset, semantic_weights, prev_center, frame_offsets, | |
next_offset) | |