Spaces:

akhaliq
/

deeplab2

Runtime error

File size: 12,815 Bytes

506da10

# coding=utf-8
# Copyright 2021 The Deeplab2 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""This file contains functions to preprocess images and labels."""

import tensorflow as tf

from deeplab2.data.preprocessing import autoaugment_utils
from deeplab2.data.preprocessing import preprocess_utils

# The probability of flipping the images and labels
# left-right during training
_PROB_OF_FLIP = 0.5

_MEAN_PIXEL = [127.5, 127.5, 127.5]


def _pad_image_and_label(image, label, offset_height, offset_width,
                         target_height, target_width, ignore_label=None):
  """Pads the image and the label to the given size.

  Args:
    image: A tf.Tensor of shape [height, width, channels].
    label: A tf.Tensor of shape [height, width, 1] or None.
    offset_height: The number of rows of zeros to add on top of the image and
      label.
    offset_width: The number of columns of zeros to add on the left of the image
      and label.
    target_height: The total height after padding.
    target_width: The total width after padding.
    ignore_label: The ignore_label for the label. Must only be set when label is
      given.

  Returns:
    The padded image and label as a tuple (padded_image, padded_label).

  Raises:
    tf.errors.InvalidArgumentError: An error occurs if the padding configuration
      is invalid.
    ValueError: An error occurs if label is given without an ignore_label.
  """
  height = tf.shape(image)[0]
  width = tf.shape(image)[1]
  original_dtype = image.dtype
  if original_dtype not in (tf.float32, tf.float64):
    image = tf.cast(image, tf.float32)

  bottom_padding = target_height - offset_height - height
  right_padding = target_width - offset_width - width

  assert_bottom_padding = tf.assert_greater(
      bottom_padding, -1,
      'The padding configuration is not valid. Please either increase the '
      'target size or reduce the padding offset.')
  assert_right_padding = tf.assert_greater(
      right_padding, -1, 'The padding configuration is not valid. Please either'
      ' increase the target size or reduce the padding offset.')
  with tf.control_dependencies([assert_bottom_padding, assert_right_padding]):
    paddings = [[offset_height, bottom_padding], [offset_width, right_padding],
                [0, 0]]

    image = image - _MEAN_PIXEL
    image = tf.pad(image, paddings)
    image = image + _MEAN_PIXEL
    image = tf.cast(image, original_dtype)

    if label is not None:
      if ignore_label is None:
        raise ValueError(
            'If a label is given, the ignore label must be set too.')
      label = tf.pad(label, paddings, constant_values=ignore_label)

    return image, label


def _update_max_resize_value(max_resize_value, crop_size, is_inference=False):
  """Checks and may update max_resize_value.

  Args:
    max_resize_value: A 2-tuple of (height, width), maximum allowed value
      after resize. If a single element is given, then height and width
      share the same value. None, empty or having 0 indicates no maximum value
      will be used.
    crop_size: A 2-tuple of (height, width), crop size used.
    is_inference: Boolean, whether the model is performing inference or not.

  Returns:
    Updated max_resize_value.
  """
  max_resize_value = preprocess_utils.process_resize_value(max_resize_value)
  if max_resize_value is None and is_inference:
    # During inference, default max_resize_value to crop size to allow
    # model taking input images with larger sizes.
    max_resize_value = crop_size

  if max_resize_value is None:
    return None

  if max_resize_value[0] > crop_size[0] or max_resize_value[1] > crop_size[1]:
    raise ValueError(
        'Maximum resize value provided (%s) exceeds model crop size (%s)' %
        (max_resize_value, crop_size))
  return max_resize_value


def preprocess_image_and_label(image,
                               label,
                               crop_height,
                               crop_width,
                               prev_image=None,
                               prev_label=None,
                               min_resize_value=None,
                               max_resize_value=None,
                               resize_factor=None,
                               min_scale_factor=1.,
                               max_scale_factor=1.,
                               scale_factor_step_size=0,
                               ignore_label=None,
                               is_training=True,
                               autoaugment_policy_name=None):
  """Preprocesses the image and label.

  Args:
    image: A tf.Tensor containing the image with shape [height, width, 3].
    label: A tf.Tensor containing the label with shape [height, width, 1] or
      None.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    prev_image: An optional tensor of shape [image_height, image_width, 3].
    prev_label: An optional tensor of shape [label_height, label_width, 1].
    min_resize_value: A 2-tuple of (height, width), desired minimum value
      after resize. If a single element is given, then height and width share
      the same value. None, empty or having 0 indicates no minimum value will
      be used.
    max_resize_value: A 2-tuple of (height, width), maximum allowed value
      after resize. If a single element is given, then height and width
      share the same value. None, empty or having 0 indicates no maximum value
      will be used.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor for random scale augmentation.
    max_scale_factor: Maximum scale factor for random scale augmentation.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    autoaugment_policy_name: String, autoaugment policy name. See
        autoaugment_policy.py for available policies.

  Returns:
    resized_image: The resized input image without other augmentations as a
      tf.Tensor.
    processed_image: The preprocessed image as a tf.Tensor.
    label: The preprocessed groundtruth segmentation label as a tf.Tensor.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
  if is_training and label is None:
    raise ValueError('During training, label must be provided.')

  image.get_shape().assert_is_compatible_with(tf.TensorShape([None, None, 3]))

  # Keep reference to original image.
  resized_image = image
  if prev_image is not None:
    image = tf.concat([image, prev_image], axis=2)
  processed_image = tf.cast(image, tf.float32)
  processed_prev_image = None

  if label is not None:
    label.get_shape().assert_is_compatible_with(tf.TensorShape([None, None, 1]))
    if prev_label is not None:
      label = tf.concat([label, prev_label], axis=2)
    label = tf.cast(label, tf.int32)

  # Resize image and label to the desired range.
  if any([min_resize_value, max_resize_value, not is_training]):
    max_resize_value = _update_max_resize_value(
        max_resize_value,
        crop_size=(crop_height, crop_width),
        is_inference=not is_training)

    processed_image, label = (
        preprocess_utils.resize_to_range(
            image=processed_image,
            label=label,
            min_size=min_resize_value,
            max_size=max_resize_value,
            factor=resize_factor,
            align_corners=True))
    if prev_image is None:
      resized_image = tf.identity(processed_image)
    else:
      resized_image, _ = tf.split(processed_image, 2, axis=2)

  if prev_image is not None:
    processed_image, processed_prev_image = tf.split(processed_image, 2, axis=2)

  if prev_label is not None:
    label, prev_label = tf.split(label, 2, axis=2)

  if not is_training:
    image_height = tf.shape(processed_image)[0]
    image_width = tf.shape(processed_image)[1]

    offset_height = 0
    offset_width = 0
    processed_image, label = _pad_image_and_label(processed_image, label,
                                                  offset_height, offset_width,
                                                  crop_height, crop_width,
                                                  ignore_label)
    processed_image.set_shape([crop_height, crop_width, 3])
    if label is not None:
      label.set_shape([crop_height, crop_width, 1])
    if prev_image is not None:
      processed_prev_image, prev_label = _pad_image_and_label(
          processed_prev_image, prev_label, offset_height, offset_width,
          crop_height, crop_width, ignore_label)
      processed_prev_image.set_shape([crop_height, crop_width, 3])
      if prev_label is not None:
        prev_label.set_shape([crop_height, crop_width, 1])
    return (resized_image, processed_image, label, processed_prev_image,
            prev_label)

  # Data augmentation by randomly scaling the inputs.
  scale = preprocess_utils.get_random_scale(
      min_scale_factor, max_scale_factor, scale_factor_step_size)
  processed_image, label = preprocess_utils.randomly_scale_image_and_label(
      processed_image, label, scale)
  if processed_prev_image is not None:
    (processed_prev_image,
     prev_label) = preprocess_utils.randomly_scale_image_and_label(
         processed_prev_image, prev_label, scale)

  # Apply autoaugment if any.
  if autoaugment_policy_name:
    processed_image, label = _autoaugment_helper(
        processed_image, label, ignore_label, autoaugment_policy_name)
    if processed_prev_image is not None:
      processed_prev_image, prev_label = _autoaugment_helper(
          processed_prev_image, prev_label, ignore_label,
          autoaugment_policy_name)

  # Pad image and label to have dimensions >= [crop_height, crop_width].
  image_height = tf.shape(processed_image)[0]
  image_width = tf.shape(processed_image)[1]
  target_height = image_height + tf.maximum(crop_height - image_height, 0)
  target_width = image_width + tf.maximum(crop_width - image_width, 0)

  # Randomly crop the image and label.
  def _uniform_offset(margin):
    return tf.random.uniform(
        [], minval=0, maxval=tf.maximum(margin, 1), dtype=tf.int32)

  offset_height = _uniform_offset(crop_height - image_height)
  offset_width = _uniform_offset(crop_width - image_width)
  processed_image, label = _pad_image_and_label(processed_image, label,
                                                offset_height, offset_width,
                                                target_height, target_width,
                                                ignore_label)
  if processed_prev_image is not None:
    processed_prev_image, prev_label = _pad_image_and_label(
        processed_prev_image, prev_label, offset_height, offset_width,
        target_height, target_width, ignore_label)

  if processed_prev_image is not None:
    (processed_image, label, processed_prev_image,
     prev_label) = preprocess_utils.random_crop(
         [processed_image, label, processed_prev_image, prev_label],
         crop_height, crop_width)
    # Randomly left-right flip the image and label.
    (processed_image, label, processed_prev_image, prev_label,
     _) = preprocess_utils.flip_dim(
         [processed_image, label, processed_prev_image, prev_label],
         _PROB_OF_FLIP,
         dim=1)
  else:
    processed_image, label = preprocess_utils.random_crop(
        [processed_image, label], crop_height, crop_width)
    # Randomly left-right flip the image and label.
    processed_image, label, _ = preprocess_utils.flip_dim(
        [processed_image, label], _PROB_OF_FLIP, dim=1)

  return resized_image, processed_image, label, processed_prev_image, prev_label


def _autoaugment_helper(image, label, ignore_label, policy_name):
  image = tf.cast(image, tf.uint8)
  label = tf.cast(label, tf.int32)
  image, label = autoaugment_utils.distort_image_with_autoaugment(
      image, label, ignore_label, policy_name)
  image = tf.cast(image, tf.float32)
  return image, label