Spaces:
Running
Running
import random | |
import cv2 | |
import numpy as np | |
from PIL import Image | |
from torchvision import transforms | |
# My libs | |
import spiga.data.loaders.augmentors.utils as dlu | |
class HorizontalFlipAug: | |
def __init__(self, ldm_flip_order, prob=0.5): | |
self.prob = prob | |
self.ldm_flip_order = ldm_flip_order | |
def __call__(self, sample): | |
img = sample['image'] | |
landmarks = sample['landmarks'] | |
mask = sample['mask_ldm'] | |
vis = sample['visible'] | |
bbox = sample['bbox'] | |
if random.random() < self.prob: | |
new_img = transforms.functional.hflip(img) | |
lm_new_order = self.ldm_flip_order | |
new_landmarks = landmarks[lm_new_order] | |
new_landmarks = (new_landmarks - (img.size[0], 0)) * (-1, 1) | |
new_mask = mask[lm_new_order] | |
new_vis = vis[lm_new_order] | |
x, y, w, h = bbox | |
new_x = img.size[0] - x - w | |
new_bbox = np.array((new_x, y, w, h)) | |
sample['image'] = new_img | |
sample['landmarks'] = new_landmarks | |
sample['mask_ldm'] = new_mask | |
sample['visible'] = new_vis | |
sample['bbox'] = new_bbox | |
return sample | |
class GeometryBaseAug: | |
def __call__(self, sample): | |
raise NotImplementedError('Inheritance __call__ not defined') | |
def map_affine_transformation(self, sample, affine_transf, new_size=None): | |
sample['image'] = self._image_affine_trans(sample['image'], affine_transf, new_size) | |
sample['bbox'] = self._bbox_affine_trans(sample['bbox'], affine_transf) | |
if 'landmarks' in sample.keys(): | |
sample['landmarks'] = self._landmarks_affine_trans(sample['landmarks'], affine_transf) | |
return sample | |
def clean_outbbox_landmarks(self, shape, landmarks, mask): | |
filter_x1 = landmarks[:, 0] >= shape[0] | |
filter_x2 = landmarks[:, 0] < (shape[0] + shape[2]) | |
filter_x = np.logical_and(filter_x1,filter_x2) | |
filter_y1 = landmarks[:, 1] >= shape[1] | |
filter_y2 = landmarks[:, 1] < (shape[1] + shape[3]) | |
filter_y = np.logical_and(filter_y1, filter_y2) | |
filter_bbox = np.logical_and(filter_x, filter_y) | |
new_mask = mask*filter_bbox | |
new_landmarks = (landmarks.T * new_mask).T | |
new_landmarks = new_landmarks.astype(int).astype(float) | |
return new_mask, new_landmarks | |
def _image_affine_trans(self, image, affine_transf, new_size=None): | |
if not new_size: | |
new_size = image.size | |
inv_affine_transf = dlu.get_inverse_transf(affine_transf) | |
new_image = image.transform(new_size, Image.AFFINE, inv_affine_transf.flatten()) | |
return new_image | |
def _bbox_affine_trans(self, bbox, affine_transf): | |
x, y, w, h = bbox | |
images_bb = [] | |
for point in ([x, y, 1], [x + w, y, 1], | |
[x, y + h, 1], [x + w, y + h, 1]): | |
images_bb.append(affine_transf.dot(point)) | |
images_bb = np.array(images_bb) | |
new_corner0 = np.min(images_bb, axis=0) | |
new_corner1 = np.max(images_bb, axis=0) | |
new_x, new_y = new_corner0 | |
new_w, new_h = new_corner1 - new_corner0 | |
new_bbox = np.array((new_x, new_y, new_w, new_h)) | |
return new_bbox | |
def _landmarks_affine_trans(self, landmarks, affine_transf): | |
homog_landmarks = dlu.affine2homogeneous(landmarks) | |
new_landmarks = affine_transf.dot(homog_landmarks.T).T | |
return new_landmarks | |
class RSTAug(GeometryBaseAug): | |
def __init__(self, angle_range=45., scale_min=-0.15, scale_max=0.15, trl_ratio=0.05): | |
self.scale_max = scale_max | |
self.scale_min = scale_min | |
self.angle_range = angle_range | |
self.trl_ratio = trl_ratio | |
def __call__(self, sample): | |
x, y, w, h = sample['bbox'] | |
x0, y0 = x + w/2, y + h/2 # center of the face, which will be the center of the rotation | |
# Bbox translation | |
rnd_Tx = np.random.uniform(-self.trl_ratio, self.trl_ratio) * w | |
rnd_Ty = np.random.uniform(-self.trl_ratio, self.trl_ratio) * h | |
sample['bbox'][0] += rnd_Tx | |
sample['bbox'][1] += rnd_Ty | |
scale = 1 + np.random.uniform(self.scale_min, self.scale_max) | |
angle = np.random.uniform(-self.angle_range, self.angle_range) | |
similarity = dlu.get_similarity_matrix(angle, scale, center=(x0, y0)) | |
new_sample = self.map_affine_transformation(sample, similarity) | |
return new_sample | |
class TargetCropAug(GeometryBaseAug): | |
def __init__(self, img_new_size=128, map_new_size=128, target_dist=1.3): | |
self.target_dist = target_dist | |
self.new_size_x, self.new_size_y = self._convert_shapes(img_new_size) | |
self.map_size_x, self.map_size_y = self._convert_shapes(map_new_size) | |
self.img2map_scale = False | |
# Mismatch between img shape and featuremap shape | |
if self.map_size_x != self.new_size_x or self.map_size_y != self.new_size_y: | |
self.img2map_scale = True | |
self.map_scale_x = self.map_size_x / self.new_size_x | |
self.map_scale_y = self.map_size_y / self.new_size_y | |
self.map_scale_xx = self.map_scale_x * self.map_scale_x | |
self.map_scale_xy = self.map_scale_x * self.map_scale_y | |
self.map_scale_yy = self.map_scale_y * self.map_scale_y | |
def _convert_shapes(self, new_size): | |
if isinstance(new_size, (tuple, list)): | |
new_size_x = new_size[0] | |
new_size_y = new_size[1] | |
else: | |
new_size_x = new_size | |
new_size_y = new_size | |
return new_size_x, new_size_y | |
def __call__(self, sample): | |
x, y, w, h = sample['bbox'] | |
# we enlarge the area taken around the bounding box | |
# it is neccesary to change the botton left point of the bounding box | |
# according to the previous enlargement. Note this will NOT be the new | |
# bounding box! | |
# We return square images, which is neccesary since | |
# all the images must have the same size in order to form batches | |
side = max(w, h) * self.target_dist | |
x -= (side - w) / 2 | |
y -= (side - h) / 2 | |
# center of the enlarged bounding box | |
x0, y0 = x + side/2, y + side/2 | |
# homothety factor, chosen so the new horizontal dimension will | |
# coincide with new_size | |
mu_x = self.new_size_x / side | |
mu_y = self.new_size_y / side | |
# new_w, new_h = new_size, int(h * mu) | |
new_w = self.new_size_x | |
new_h = self.new_size_y | |
new_x0, new_y0 = new_w / 2, new_h / 2 | |
# dilatation + translation | |
affine_transf = np.array([[mu_x, 0, new_x0 - mu_x * x0], | |
[0, mu_y, new_y0 - mu_y * y0]]) | |
sample = self.map_affine_transformation(sample, affine_transf,(new_w, new_h)) | |
if 'landmarks' in sample.keys(): | |
img_shape = np.array([0, 0, self.new_size_x, self.new_size_y]) | |
sample['landmarks_float'] = sample['landmarks'] | |
sample['mask_ldm_float'] = sample['mask_ldm'] | |
sample['landmarks'] = np.round(sample['landmarks']) | |
sample['mask_ldm'], sample['landmarks'] = self.clean_outbbox_landmarks(img_shape, sample['landmarks'], | |
sample['mask_ldm']) | |
if self.img2map_scale: | |
sample = self._rescale_map(sample) | |
return sample | |
def _rescale_map(self, sample): | |
# Rescale | |
lnd_float = sample['landmarks_float'] | |
lnd_float[:, 0] = self.map_scale_x * lnd_float[:, 0] | |
lnd_float[:, 1] = self.map_scale_y * lnd_float[:, 1] | |
# Filter landmarks | |
lnd = np.round(lnd_float) | |
filter_x = lnd[:, 0] >= self.map_size_x | |
filter_y = lnd[:, 1] >= self.map_size_y | |
lnd[filter_x] = self.map_size_x - 1 | |
lnd[filter_y] = self.map_size_y - 1 | |
new_lnd = (lnd.T * sample['mask_ldm']).T | |
new_lnd = new_lnd.astype(int).astype(float) | |
sample['landmarks_float'] = lnd_float | |
sample['landmarks'] = new_lnd | |
sample['img2map_scale'] = [self.map_scale_x, self.map_scale_y] | |
return sample | |
class OcclusionAug: | |
def __init__(self, min_length=0.1, max_length=0.4, num_maps=1): | |
self.min_length = min_length | |
self.max_length = max_length | |
self.num_maps = num_maps | |
def __call__(self, sample): | |
x, y, w, h = sample['bbox'] | |
image = sample['image'] | |
landmarks = sample['landmarks'] | |
vis = sample['visible'] | |
min_ratio = self.min_length | |
max_ratio = self.max_length | |
rnd_width = np.random.randint(int(w * min_ratio), int(w * max_ratio)) | |
rnd_height = np.random.randint(int(h * min_ratio), int(h * max_ratio)) | |
# (xi, yi) and (xf, yf) are, respectively, the lower left points of the | |
# occlusion rectangle and the upper right point. | |
xi = int(x + np.random.randint(0, w - rnd_width)) | |
xf = int(xi + rnd_width) | |
yi = int(y + np.random.randint(0, h - rnd_height)) | |
yf = int(yi + rnd_height) | |
pixels = np.array(image) | |
pixels[yi:yf, xi:xf, :] = np.random.uniform(0, 255, size=3) | |
image = Image.fromarray(pixels) | |
sample['image'] = image | |
# Update visibilities | |
filter_x1 = landmarks[:, 0] >= xi | |
filter_x2 = landmarks[:, 0] < xf | |
filter_x = np.logical_and(filter_x1, filter_x2) | |
filter_y1 = landmarks[:, 1] >= yi | |
filter_y2 = landmarks[:, 1] < yf | |
filter_y = np.logical_and(filter_y1, filter_y2) | |
filter_novis = np.logical_and(filter_x, filter_y) | |
filter_vis = np.logical_not(filter_novis) | |
sample['visible'] = vis * filter_vis | |
return sample | |
class LightingAug: | |
def __init__(self, hsv_range_min=(-0.5, -0.5, -0.5), hsv_range_max=(0.5, 0.5, 0.5)): | |
self.hsv_range_min = hsv_range_min | |
self.hsv_range_max = hsv_range_max | |
def __call__(self, sample): | |
# Convert to HSV colorspace from RGB colorspace | |
image = np.array(sample['image']) | |
hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) | |
# Generate new random values | |
H = 1 + np.random.uniform(self.hsv_range_min[0], self.hsv_range_max[0]) | |
S = 1 + np.random.uniform(self.hsv_range_min[1], self.hsv_range_max[1]) | |
V = 1 + np.random.uniform(self.hsv_range_min[2], self.hsv_range_max[2]) | |
hsv[:, :, 0] = np.clip(H*hsv[:, :, 0], 0, 179) | |
hsv[:, :, 1] = np.clip(S*hsv[:, :, 1], 0, 255) | |
hsv[:, :, 2] = np.clip(V*hsv[:, :, 2], 0, 255) | |
# Convert back to BGR colorspace | |
image = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) | |
sample['image'] = Image.fromarray(image) | |
return sample | |
class BlurAug: | |
def __init__(self, blur_prob=0.5, blur_kernel_range=(0, 2)): | |
self.blur_prob = blur_prob | |
self.kernel_range = blur_kernel_range | |
def __call__(self, sample): | |
# Smooth image | |
image = np.array(sample['image']) | |
if np.random.uniform(0.0, 1.0) < self.blur_prob: | |
kernel = np.random.random_integers(self.kernel_range[0], self.kernel_range[1]) * 2 + 1 | |
image = cv2.GaussianBlur(image, (kernel, kernel), 0, 0) | |
sample['image'] = Image.fromarray(image) | |
return sample | |