File size: 12,815 Bytes
506da10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
# coding=utf-8
# Copyright 2021 The Deeplab2 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""This file contains functions to preprocess images and labels."""

import tensorflow as tf

from deeplab2.data.preprocessing import autoaugment_utils
from deeplab2.data.preprocessing import preprocess_utils

# The probability of flipping the images and labels
# left-right during training
_PROB_OF_FLIP = 0.5

_MEAN_PIXEL = [127.5, 127.5, 127.5]


def _pad_image_and_label(image, label, offset_height, offset_width,
                         target_height, target_width, ignore_label=None):
  """Pads the image and the label to the given size.

  Args:
    image: A tf.Tensor of shape [height, width, channels].
    label: A tf.Tensor of shape [height, width, 1] or None.
    offset_height: The number of rows of zeros to add on top of the image and
      label.
    offset_width: The number of columns of zeros to add on the left of the image
      and label.
    target_height: The total height after padding.
    target_width: The total width after padding.
    ignore_label: The ignore_label for the label. Must only be set when label is
      given.

  Returns:
    The padded image and label as a tuple (padded_image, padded_label).

  Raises:
    tf.errors.InvalidArgumentError: An error occurs if the padding configuration
      is invalid.
    ValueError: An error occurs if label is given without an ignore_label.
  """
  height = tf.shape(image)[0]
  width = tf.shape(image)[1]
  original_dtype = image.dtype
  if original_dtype not in (tf.float32, tf.float64):
    image = tf.cast(image, tf.float32)

  bottom_padding = target_height - offset_height - height
  right_padding = target_width - offset_width - width

  assert_bottom_padding = tf.assert_greater(
      bottom_padding, -1,
      'The padding configuration is not valid. Please either increase the '
      'target size or reduce the padding offset.')
  assert_right_padding = tf.assert_greater(
      right_padding, -1, 'The padding configuration is not valid. Please either'
      ' increase the target size or reduce the padding offset.')
  with tf.control_dependencies([assert_bottom_padding, assert_right_padding]):
    paddings = [[offset_height, bottom_padding], [offset_width, right_padding],
                [0, 0]]

    image = image - _MEAN_PIXEL
    image = tf.pad(image, paddings)
    image = image + _MEAN_PIXEL
    image = tf.cast(image, original_dtype)

    if label is not None:
      if ignore_label is None:
        raise ValueError(
            'If a label is given, the ignore label must be set too.')
      label = tf.pad(label, paddings, constant_values=ignore_label)

    return image, label


def _update_max_resize_value(max_resize_value, crop_size, is_inference=False):
  """Checks and may update max_resize_value.

  Args:
    max_resize_value: A 2-tuple of (height, width), maximum allowed value
      after resize. If a single element is given, then height and width
      share the same value. None, empty or having 0 indicates no maximum value
      will be used.
    crop_size: A 2-tuple of (height, width), crop size used.
    is_inference: Boolean, whether the model is performing inference or not.

  Returns:
    Updated max_resize_value.
  """
  max_resize_value = preprocess_utils.process_resize_value(max_resize_value)
  if max_resize_value is None and is_inference:
    # During inference, default max_resize_value to crop size to allow
    # model taking input images with larger sizes.
    max_resize_value = crop_size

  if max_resize_value is None:
    return None

  if max_resize_value[0] > crop_size[0] or max_resize_value[1] > crop_size[1]:
    raise ValueError(
        'Maximum resize value provided (%s) exceeds model crop size (%s)' %
        (max_resize_value, crop_size))
  return max_resize_value


def preprocess_image_and_label(image,
                               label,
                               crop_height,
                               crop_width,
                               prev_image=None,
                               prev_label=None,
                               min_resize_value=None,
                               max_resize_value=None,
                               resize_factor=None,
                               min_scale_factor=1.,
                               max_scale_factor=1.,
                               scale_factor_step_size=0,
                               ignore_label=None,
                               is_training=True,
                               autoaugment_policy_name=None):
  """Preprocesses the image and label.

  Args:
    image: A tf.Tensor containing the image with shape [height, width, 3].
    label: A tf.Tensor containing the label with shape [height, width, 1] or
      None.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    prev_image: An optional tensor of shape [image_height, image_width, 3].
    prev_label: An optional tensor of shape [label_height, label_width, 1].
    min_resize_value: A 2-tuple of (height, width), desired minimum value
      after resize. If a single element is given, then height and width share
      the same value. None, empty or having 0 indicates no minimum value will
      be used.
    max_resize_value: A 2-tuple of (height, width), maximum allowed value
      after resize. If a single element is given, then height and width
      share the same value. None, empty or having 0 indicates no maximum value
      will be used.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor for random scale augmentation.
    max_scale_factor: Maximum scale factor for random scale augmentation.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    autoaugment_policy_name: String, autoaugment policy name. See
        autoaugment_policy.py for available policies.

  Returns:
    resized_image: The resized input image without other augmentations as a
      tf.Tensor.
    processed_image: The preprocessed image as a tf.Tensor.
    label: The preprocessed groundtruth segmentation label as a tf.Tensor.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
  if is_training and label is None:
    raise ValueError('During training, label must be provided.')

  image.get_shape().assert_is_compatible_with(tf.TensorShape([None, None, 3]))

  # Keep reference to original image.
  resized_image = image
  if prev_image is not None:
    image = tf.concat([image, prev_image], axis=2)
  processed_image = tf.cast(image, tf.float32)
  processed_prev_image = None

  if label is not None:
    label.get_shape().assert_is_compatible_with(tf.TensorShape([None, None, 1]))
    if prev_label is not None:
      label = tf.concat([label, prev_label], axis=2)
    label = tf.cast(label, tf.int32)

  # Resize image and label to the desired range.
  if any([min_resize_value, max_resize_value, not is_training]):
    max_resize_value = _update_max_resize_value(
        max_resize_value,
        crop_size=(crop_height, crop_width),
        is_inference=not is_training)

    processed_image, label = (
        preprocess_utils.resize_to_range(
            image=processed_image,
            label=label,
            min_size=min_resize_value,
            max_size=max_resize_value,
            factor=resize_factor,
            align_corners=True))
    if prev_image is None:
      resized_image = tf.identity(processed_image)
    else:
      resized_image, _ = tf.split(processed_image, 2, axis=2)

  if prev_image is not None:
    processed_image, processed_prev_image = tf.split(processed_image, 2, axis=2)

  if prev_label is not None:
    label, prev_label = tf.split(label, 2, axis=2)

  if not is_training:
    image_height = tf.shape(processed_image)[0]
    image_width = tf.shape(processed_image)[1]

    offset_height = 0
    offset_width = 0
    processed_image, label = _pad_image_and_label(processed_image, label,
                                                  offset_height, offset_width,
                                                  crop_height, crop_width,
                                                  ignore_label)
    processed_image.set_shape([crop_height, crop_width, 3])
    if label is not None:
      label.set_shape([crop_height, crop_width, 1])
    if prev_image is not None:
      processed_prev_image, prev_label = _pad_image_and_label(
          processed_prev_image, prev_label, offset_height, offset_width,
          crop_height, crop_width, ignore_label)
      processed_prev_image.set_shape([crop_height, crop_width, 3])
      if prev_label is not None:
        prev_label.set_shape([crop_height, crop_width, 1])
    return (resized_image, processed_image, label, processed_prev_image,
            prev_label)

  # Data augmentation by randomly scaling the inputs.
  scale = preprocess_utils.get_random_scale(
      min_scale_factor, max_scale_factor, scale_factor_step_size)
  processed_image, label = preprocess_utils.randomly_scale_image_and_label(
      processed_image, label, scale)
  if processed_prev_image is not None:
    (processed_prev_image,
     prev_label) = preprocess_utils.randomly_scale_image_and_label(
         processed_prev_image, prev_label, scale)

  # Apply autoaugment if any.
  if autoaugment_policy_name:
    processed_image, label = _autoaugment_helper(
        processed_image, label, ignore_label, autoaugment_policy_name)
    if processed_prev_image is not None:
      processed_prev_image, prev_label = _autoaugment_helper(
          processed_prev_image, prev_label, ignore_label,
          autoaugment_policy_name)

  # Pad image and label to have dimensions >= [crop_height, crop_width].
  image_height = tf.shape(processed_image)[0]
  image_width = tf.shape(processed_image)[1]
  target_height = image_height + tf.maximum(crop_height - image_height, 0)
  target_width = image_width + tf.maximum(crop_width - image_width, 0)

  # Randomly crop the image and label.
  def _uniform_offset(margin):
    return tf.random.uniform(
        [], minval=0, maxval=tf.maximum(margin, 1), dtype=tf.int32)

  offset_height = _uniform_offset(crop_height - image_height)
  offset_width = _uniform_offset(crop_width - image_width)
  processed_image, label = _pad_image_and_label(processed_image, label,
                                                offset_height, offset_width,
                                                target_height, target_width,
                                                ignore_label)
  if processed_prev_image is not None:
    processed_prev_image, prev_label = _pad_image_and_label(
        processed_prev_image, prev_label, offset_height, offset_width,
        target_height, target_width, ignore_label)

  if processed_prev_image is not None:
    (processed_image, label, processed_prev_image,
     prev_label) = preprocess_utils.random_crop(
         [processed_image, label, processed_prev_image, prev_label],
         crop_height, crop_width)
    # Randomly left-right flip the image and label.
    (processed_image, label, processed_prev_image, prev_label,
     _) = preprocess_utils.flip_dim(
         [processed_image, label, processed_prev_image, prev_label],
         _PROB_OF_FLIP,
         dim=1)
  else:
    processed_image, label = preprocess_utils.random_crop(
        [processed_image, label], crop_height, crop_width)
    # Randomly left-right flip the image and label.
    processed_image, label, _ = preprocess_utils.flip_dim(
        [processed_image, label], _PROB_OF_FLIP, dim=1)

  return resized_image, processed_image, label, processed_prev_image, prev_label


def _autoaugment_helper(image, label, ignore_label, policy_name):
  image = tf.cast(image, tf.uint8)
  label = tf.cast(label, tf.int32)
  image, label = autoaugment_utils.distort_image_with_autoaugment(
      image, label, ignore_label, policy_name)
  image = tf.cast(image, tf.float32)
  return image, label