Spaces:

akhaliq
/

deeplab2

Runtime error

deeplab2 / data /preprocessing /autoaugment_utils.py

akhaliq3

spaces demo

506da10 almost 4 years ago

14.7 kB

	# coding=utf-8
	# Copyright 2021 The Deeplab2 Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""AutoAugment utility file.

	Please cite or refer to the following papers:
	- Ekin D Cubuk, Barret Zoph, Dandelion Mane, Vijay Vasudevan, and Quoc V Le.
	"Autoaugment: Learning augmentation policies from data." In CVPR, 2019.

	- Ekin D Cubuk, Barret Zoph, Jonathon Shlens, and Quoc V Le.
	"Randaugment: Practical automated data augmentation with a reduced search
	space." In CVPR, 2020.
	"""

	import inspect

	import tensorflow as tf

	from deeplab2.data.preprocessing import autoaugment_policy


	# This signifies the max integer that the controller RNN could predict for the
	# augmentation scheme.
	_MAX_LEVEL = 10.


	def blend(image1, image2, factor):
	"""Blends image1 and image2 using 'factor'.

	Factor can be above 0.0. A value of 0.0 means only image1 is used.
	A value of 1.0 means only image2 is used. A value between 0.0 and
	1.0 means we linearly interpolate the pixel values between the two
	images. A value greater than 1.0 "extrapolates" the difference
	between the two pixel values, and we clip the results to values
	between 0 and 255.

	Args:
	image1: An image Tensor of type uint8.
	image2: An image Tensor of type uint8.
	factor: A floating point value above 0.0.

	Returns:
	A blended image Tensor of type uint8.
	"""
	if factor == 0.0:
	return tf.convert_to_tensor(image1)
	if factor == 1.0:
	return tf.convert_to_tensor(image2)

	image1 = tf.cast(image1, tf.float32)
	image2 = tf.cast(image2, tf.float32)

	difference = image2 - image1
	scaled = factor * difference

	# Do addition in float.
	temp = tf.cast(image1, tf.float32) + scaled

	# Interpolate
	if factor > 0.0 and factor < 1.0:
	# Interpolation means we always stay within 0 and 255.
	return tf.cast(temp, tf.uint8)

	# Extrapolate:
	#
	# We need to clip and then cast.
	return tf.cast(tf.clip_by_value(temp, 0.0, 255.0), tf.uint8)


	def solarize(image, threshold=128):
	# For each pixel in the image, select the pixel
	# if the value is less than the threshold.
	# Otherwise, subtract 255 from the pixel.
	return tf.where(image < threshold, image, 255 - image)


	def invert(image):
	"""Inverts the image pixels."""
	image = tf.convert_to_tensor(image)
	return 255 - image


	def color(image, factor):
	"""Equivalent of PIL Color."""
	degenerate = tf.image.grayscale_to_rgb(tf.image.rgb_to_grayscale(image))
	return blend(degenerate, image, factor)


	def contrast(image, factor):
	"""Equivalent of PIL Contrast."""
	degenerate = tf.image.rgb_to_grayscale(image)
	# Cast before calling tf.histogram.
	degenerate = tf.cast(degenerate, tf.int32)

	# Compute the grayscale histogram, then compute the mean pixel value,
	# and create a constant image size of that value. Use that as the
	# blending degenerate target of the original image.
	hist = tf.histogram_fixed_width(degenerate, [0, 255], nbins=256)
	mean = tf.reduce_sum(tf.cast(hist, tf.float32)) / 256.0
	degenerate = tf.ones_like(degenerate, dtype=tf.float32) * mean
	degenerate = tf.clip_by_value(degenerate, 0.0, 255.0)
	degenerate = tf.image.grayscale_to_rgb(tf.cast(degenerate, tf.uint8))
	return blend(degenerate, image, factor)


	def brightness(image, factor):
	"""Equivalent of PIL Brightness."""
	degenerate = tf.zeros_like(image)
	return blend(degenerate, image, factor)


	def posterize(image, bits):
	"""Equivalent of PIL Posterize."""
	shift = 8 - bits
	return tf.bitwise.left_shift(tf.bitwise.right_shift(image, shift), shift)


	def autocontrast(image):
	"""Implements Autocontrast function from PIL using TF ops.

	Args:
	image: A 3D uint8 tensor.

	Returns:
	The image after it has had autocontrast applied to it and will be of type
	uint8.
	"""

	def scale_channel(image):
	"""Scale the 2D image using the autocontrast rule."""
	# A possibly cheaper version can be done using cumsum/unique_with_counts
	# over the histogram values, rather than iterating over the entire image.
	# to compute mins and maxes.
	lo = tf.cast(tf.reduce_min(image), tf.float32)
	hi = tf.cast(tf.reduce_max(image), tf.float32)

	# Scale the image, making the lowest value 0 and the highest value 255.
	def scale_values(im):
	scale = 255.0 / (hi - lo)
	offset = -lo * scale
	im = tf.cast(im, tf.float32) * scale + offset
	im = tf.clip_by_value(im, 0.0, 255.0)
	return tf.cast(im, tf.uint8)

	result = tf.cond(hi > lo, lambda: scale_values(image), lambda: image)
	return result

	# Assumes RGB for now. Scales each channel independently
	# and then stacks the result.
	s1 = scale_channel(image[:, :, 0])
	s2 = scale_channel(image[:, :, 1])
	s3 = scale_channel(image[:, :, 2])
	image = tf.stack([s1, s2, s3], 2)
	return image


	def sharpness(image, factor):
	"""Implements Sharpness function from PIL using TF ops."""
	orig_image = image
	image = tf.cast(image, tf.float32)
	# Make image 4D for conv operation.
	image = tf.expand_dims(image, 0)
	# SMOOTH PIL Kernel.
	kernel = tf.constant(
	[[1, 1, 1], [1, 5, 1], [1, 1, 1]], dtype=tf.float32,
	shape=[3, 3, 1, 1]) / 13.
	# Tile across channel dimension.
	kernel = tf.tile(kernel, [1, 1, 3, 1])
	strides = [1, 1, 1, 1]
	degenerate = tf.nn.depthwise_conv2d(
	image, kernel, strides, padding='VALID', dilations=[1, 1])
	degenerate = tf.clip_by_value(degenerate, 0.0, 255.0)
	degenerate = tf.squeeze(tf.cast(degenerate, tf.uint8), [0])

	# For the borders of the resulting image, fill in the values of the
	# original image.
	mask = tf.ones_like(degenerate)
	padded_mask = tf.pad(mask, [[1, 1], [1, 1], [0, 0]])
	padded_degenerate = tf.pad(degenerate, [[1, 1], [1, 1], [0, 0]])
	result = tf.where(tf.equal(padded_mask, 1), padded_degenerate, orig_image)

	# Blend the final result.
	return blend(result, orig_image, factor)


	def equalize(image):
	"""Implements Equalize function from PIL using TF ops."""
	def scale_channel(im, c):
	"""Scale the data in the channel to implement equalize."""
	im = tf.cast(im[:, :, c], tf.int32)
	# Compute the histogram of the image channel.
	histo = tf.histogram_fixed_width(im, [0, 255], nbins=256)

	# For the purposes of computing the step, filter out the nonzeros.
	nonzero = tf.where(tf.not_equal(histo, 0))
	nonzero_histo = tf.reshape(tf.gather(histo, nonzero), [-1])
	step = (tf.reduce_sum(nonzero_histo) - nonzero_histo[-1]) // 255

	def build_lut(histo, step):
	# Compute the cumulative sum, shifting by step // 2
	# and then normalization by step.
	lut = (tf.cumsum(histo) + (step // 2)) // step
	# Shift lut, prepending with 0.
	lut = tf.concat([[0], lut[:-1]], 0)
	# Clip the counts to be in range. This is done
	# in the C code for image.point.
	return tf.clip_by_value(lut, 0, 255)

	# If step is zero, return the original image. Otherwise, build
	# lut from the full histogram and step and then index from it.
	result = tf.cond(tf.equal(step, 0),
	lambda: im,
	lambda: tf.gather(build_lut(histo, step), im))

	return tf.cast(result, tf.uint8)

	# Assumes RGB for now. Scales each channel independently
	# and then stacks the result.
	s1 = scale_channel(image, 0)
	s2 = scale_channel(image, 1)
	s3 = scale_channel(image, 2)
	image = tf.stack([s1, s2, s3], 2)
	return image


	NAME_TO_FUNC = {
	'AutoContrast': autocontrast,
	'Equalize': equalize,
	'Invert': invert,
	'Posterize': posterize,
	'Solarize': solarize,
	'Color': color,
	'Contrast': contrast,
	'Brightness': brightness,
	'Sharpness': sharpness,
	}


	def _enhance_level_to_arg(level):
	return ((level/_MAX_LEVEL) * 1.8 + 0.1,)


	def level_to_arg():
	return {
	'AutoContrast':
	lambda level: (),
	'Equalize':
	lambda level: (),
	'Invert':
	lambda level: (),
	'Posterize': lambda level: (int((level/_MAX_LEVEL) * 4),),
	'Solarize': lambda level: (int((level/_MAX_LEVEL) * 256),),
	'Color':
	_enhance_level_to_arg,
	'Contrast':
	_enhance_level_to_arg,
	'Brightness':
	_enhance_level_to_arg,
	'Sharpness':
	_enhance_level_to_arg,
	}


	def label_wrapper(func):
	"""Adds a label function argument to func and returns unchanged label."""
	def wrapper(images, label, args, *kwargs):
	return func(images, args, *kwargs), label
	return wrapper


	def _parse_policy_info(name, prob, level, replace_value, ignore_label):
	"""Returns the function corresponding to `name` and update `level` param."""
	func = NAME_TO_FUNC[name]
	args = level_to_arg()[name](level)

	if 'prob' in inspect.getfullargspec(func)[0]:
	args = tuple([prob] + list(args))

	# Add in replace arg if it is required for the function that is being called.
	if 'replace' in inspect.getfullargspec(func)[0]:
	# Make sure ignore_label is also in the argument.
	assert 'ignore_label' in inspect.getfullargspec(func)[0]
	# Make sure replace is the second from last argument
	assert 'replace' == inspect.getfullargspec(func)[0][-2]
	# Make sure ignore_label is the final argument
	assert 'ignore_label' == inspect.getfullargspec(func)[0][-1]
	args = tuple(list(args) + [replace_value, ignore_label])

	# Add label as the second positional argument for the function if it does
	# not already exist.
	if 'label' not in inspect.getfullargspec(func)[0]:
	func = label_wrapper(func)
	return (func, prob, args)


	def _apply_func_with_prob(func, image, args, prob, label):
	"""Apply `func` to image w/ `args` as input with probability `prob`."""
	assert isinstance(args, tuple)
	assert 'label' == inspect.getfullargspec(func)[0][1]

	# If prob is a function argument, then this randomness is being handled
	# inside the function, so make sure it is always called.
	if 'prob' in inspect.getfullargspec(func)[0]:
	prob = 1.0

	# Apply the function with probability `prob`.
	should_apply_op = tf.cast(
	tf.floor(tf.random.uniform([], dtype=tf.float32) + prob), tf.bool)
	augmented_image, augmented_label = tf.cond(
	should_apply_op,
	lambda: func(image, label, *args),
	lambda: (image, label))
	return augmented_image, augmented_label


	def select_and_apply_random_policy(policies, image, label):
	"""Select a random policy from `policies` and apply it to `image`."""
	policy_to_select = tf.random.uniform([], maxval=len(policies), dtype=tf.int32)
	# Note that using tf.case instead of tf.conds would result in significantly
	# larger graphs and would even break export for some larger policies.
	for (i, policy) in enumerate(policies):
	image, label = tf.cond(
	tf.equal(i, policy_to_select),
	lambda selected_policy=policy: selected_policy(image, label),
	lambda: (image, label))
	return (image, label)


	def build_and_apply_autoaugment_policy(policies, image, label, ignore_label):
	"""Builds a policy from the given policies passed in and applies to image.

	Args:
	policies: list of lists of tuples in the form `(func, prob, level)`, `func`
	is a string name of the augmentation function, `prob` is the probability
	of applying the `func` operation, `level` is the input argument for
	`func`.
	image: tf.Tensor that the resulting policy will be applied to.
	label: tf.Tensor that the resulting policy will be applied to.
	ignore_label: The label value which will be ignored for training and
	evaluation.

	Returns:
	A version of image that now has data augmentation applied to it based on
	the `policies` pass into the function. Additionally, returns bboxes if
	a value for them is passed in that is not None
	"""
	replace_value = [128, 128, 128]

	# func is the string name of the augmentation function, prob is the
	# probability of applying the operation and level is the parameter associated
	# with the tf op.

	# tf_policies are functions that take in an image and return an augmented
	# image.
	tf_policies = []
	for policy in policies:
	tf_policy = []
	# Link string name to the correct python function and make sure the correct
	# argument is passed into that function.
	for policy_info in policy:
	policy_info = (
	list(policy_info) + [replace_value, ignore_label])

	tf_policy.append(_parse_policy_info(*policy_info))
	# Now build the tf policy that will apply the augmentation procedue
	# on image.
	def make_final_policy(tf_policy_):
	def final_policy(image_, label_):
	for func, prob, args in tf_policy_:
	image_, label_ = _apply_func_with_prob(
	func, image_, args, prob, label_)
	return image_, label_
	return final_policy
	tf_policies.append(make_final_policy(tf_policy))

	augmented_images, augmented_label = select_and_apply_random_policy(
	tf_policies, image, label)
	# If no bounding boxes were specified, then just return the images.
	return (augmented_images, augmented_label)


	def distort_image_with_autoaugment(image,
	label,
	ignore_label,
	augmentation_name=None):
	"""Applies the AutoAugment policy to `image` and `label`.

	Args:
	image: `Tensor` of shape [height, width, 3] representing an image.
	label: `Tensor` of shape [height, width, 1] representing a label.
	ignore_label: The label value which will be ignored for training and
	evaluation.
	augmentation_name: The name of the AutoAugment policy to use. See
	autoaugment_policy.py for available_policies.

	Returns:
	A tuple containing the augmented versions of `image` and `label`.

	Raises:
	ValueError: If the augmentation_name is not in available_policies.
	"""
	if augmentation_name:
	available_policies = autoaugment_policy.available_policies
	if augmentation_name not in available_policies:
	raise ValueError(
	'Invalid augmentation_name: {}'.format(augmentation_name))
	policy = available_policies[augmentation_name]
	return build_and_apply_autoaugment_policy(
	policy, image, label, ignore_label)
	return image, label