Spaces:

svjack
/

ControlNet-Face-Chinese

Running

App Files Files Community

ControlNet-Face-Chinese / SPIGA /spiga /data /loaders /augmentors /landmarks.py

svjack

Upload folder using huggingface_hub

d015578 verified about 1 year ago

raw

history blame contribute delete

11.4 kB

	import random
	import cv2
	import numpy as np
	from PIL import Image
	from torchvision import transforms

	# My libs
	import spiga.data.loaders.augmentors.utils as dlu


	class HorizontalFlipAug:
	def __init__(self, ldm_flip_order, prob=0.5):
	self.prob = prob
	self.ldm_flip_order = ldm_flip_order

	def __call__(self, sample):
	img = sample['image']
	landmarks = sample['landmarks']
	mask = sample['mask_ldm']
	vis = sample['visible']
	bbox = sample['bbox']

	if random.random() < self.prob:
	new_img = transforms.functional.hflip(img)

	lm_new_order = self.ldm_flip_order
	new_landmarks = landmarks[lm_new_order]
	new_landmarks = (new_landmarks - (img.size[0], 0)) * (-1, 1)
	new_mask = mask[lm_new_order]
	new_vis = vis[lm_new_order]

	x, y, w, h = bbox
	new_x = img.size[0] - x - w
	new_bbox = np.array((new_x, y, w, h))

	sample['image'] = new_img
	sample['landmarks'] = new_landmarks
	sample['mask_ldm'] = new_mask
	sample['visible'] = new_vis
	sample['bbox'] = new_bbox

	return sample


	class GeometryBaseAug:

	def __call__(self, sample):
	raise NotImplementedError('Inheritance __call__ not defined')

	def map_affine_transformation(self, sample, affine_transf, new_size=None):
	sample['image'] = self._image_affine_trans(sample['image'], affine_transf, new_size)
	sample['bbox'] = self._bbox_affine_trans(sample['bbox'], affine_transf)
	if 'landmarks' in sample.keys():
	sample['landmarks'] = self._landmarks_affine_trans(sample['landmarks'], affine_transf)
	return sample

	def clean_outbbox_landmarks(self, shape, landmarks, mask):
	filter_x1 = landmarks[:, 0] >= shape[0]
	filter_x2 = landmarks[:, 0] < (shape[0] + shape[2])
	filter_x = np.logical_and(filter_x1,filter_x2)

	filter_y1 = landmarks[:, 1] >= shape[1]
	filter_y2 = landmarks[:, 1] < (shape[1] + shape[3])
	filter_y = np.logical_and(filter_y1, filter_y2)

	filter_bbox = np.logical_and(filter_x, filter_y)
	new_mask = mask*filter_bbox
	new_landmarks = (landmarks.T * new_mask).T
	new_landmarks = new_landmarks.astype(int).astype(float)
	return new_mask, new_landmarks

	def _image_affine_trans(self, image, affine_transf, new_size=None):

	if not new_size:
	new_size = image.size

	inv_affine_transf = dlu.get_inverse_transf(affine_transf)
	new_image = image.transform(new_size, Image.AFFINE, inv_affine_transf.flatten())
	return new_image

	def _bbox_affine_trans(self, bbox, affine_transf):

	x, y, w, h = bbox
	images_bb = []
	for point in ([x, y, 1], [x + w, y, 1],
	[x, y + h, 1], [x + w, y + h, 1]):
	images_bb.append(affine_transf.dot(point))
	images_bb = np.array(images_bb)

	new_corner0 = np.min(images_bb, axis=0)
	new_corner1 = np.max(images_bb, axis=0)

	new_x, new_y = new_corner0
	new_w, new_h = new_corner1 - new_corner0
	new_bbox = np.array((new_x, new_y, new_w, new_h))
	return new_bbox

	def _landmarks_affine_trans(self, landmarks, affine_transf):

	homog_landmarks = dlu.affine2homogeneous(landmarks)
	new_landmarks = affine_transf.dot(homog_landmarks.T).T
	return new_landmarks


	class RSTAug(GeometryBaseAug):

	def __init__(self, angle_range=45., scale_min=-0.15, scale_max=0.15, trl_ratio=0.05):
	self.scale_max = scale_max
	self.scale_min = scale_min
	self.angle_range = angle_range
	self.trl_ratio = trl_ratio

	def __call__(self, sample):
	x, y, w, h = sample['bbox']

	x0, y0 = x + w/2, y + h/2 # center of the face, which will be the center of the rotation

	# Bbox translation
	rnd_Tx = np.random.uniform(-self.trl_ratio, self.trl_ratio) * w
	rnd_Ty = np.random.uniform(-self.trl_ratio, self.trl_ratio) * h
	sample['bbox'][0] += rnd_Tx
	sample['bbox'][1] += rnd_Ty

	scale = 1 + np.random.uniform(self.scale_min, self.scale_max)
	angle = np.random.uniform(-self.angle_range, self.angle_range)

	similarity = dlu.get_similarity_matrix(angle, scale, center=(x0, y0))
	new_sample = self.map_affine_transformation(sample, similarity)
	return new_sample


	class TargetCropAug(GeometryBaseAug):
	def __init__(self, img_new_size=128, map_new_size=128, target_dist=1.3):

	self.target_dist = target_dist
	self.new_size_x, self.new_size_y = self._convert_shapes(img_new_size)
	self.map_size_x, self.map_size_y = self._convert_shapes(map_new_size)
	self.img2map_scale = False

	# Mismatch between img shape and featuremap shape
	if self.map_size_x != self.new_size_x or self.map_size_y != self.new_size_y:
	self.img2map_scale = True
	self.map_scale_x = self.map_size_x / self.new_size_x
	self.map_scale_y = self.map_size_y / self.new_size_y
	self.map_scale_xx = self.map_scale_x * self.map_scale_x
	self.map_scale_xy = self.map_scale_x * self.map_scale_y
	self.map_scale_yy = self.map_scale_y * self.map_scale_y

	def _convert_shapes(self, new_size):
	if isinstance(new_size, (tuple, list)):
	new_size_x = new_size[0]
	new_size_y = new_size[1]
	else:
	new_size_x = new_size
	new_size_y = new_size
	return new_size_x, new_size_y

	def __call__(self, sample):
	x, y, w, h = sample['bbox']
	# we enlarge the area taken around the bounding box
	# it is neccesary to change the botton left point of the bounding box
	# according to the previous enlargement. Note this will NOT be the new
	# bounding box!
	# We return square images, which is neccesary since
	# all the images must have the same size in order to form batches
	side = max(w, h) * self.target_dist
	x -= (side - w) / 2
	y -= (side - h) / 2

	# center of the enlarged bounding box
	x0, y0 = x + side/2, y + side/2
	# homothety factor, chosen so the new horizontal dimension will
	# coincide with new_size
	mu_x = self.new_size_x / side
	mu_y = self.new_size_y / side

	# new_w, new_h = new_size, int(h * mu)
	new_w = self.new_size_x
	new_h = self.new_size_y
	new_x0, new_y0 = new_w / 2, new_h / 2

	# dilatation + translation
	affine_transf = np.array([[mu_x, 0, new_x0 - mu_x * x0],
	[0, mu_y, new_y0 - mu_y * y0]])

	sample = self.map_affine_transformation(sample, affine_transf,(new_w, new_h))
	if 'landmarks' in sample.keys():
	img_shape = np.array([0, 0, self.new_size_x, self.new_size_y])
	sample['landmarks_float'] = sample['landmarks']
	sample['mask_ldm_float'] = sample['mask_ldm']
	sample['landmarks'] = np.round(sample['landmarks'])
	sample['mask_ldm'], sample['landmarks'] = self.clean_outbbox_landmarks(img_shape, sample['landmarks'],
	sample['mask_ldm'])

	if self.img2map_scale:
	sample = self._rescale_map(sample)
	return sample

	def _rescale_map(self, sample):

	# Rescale
	lnd_float = sample['landmarks_float']
	lnd_float[:, 0] = self.map_scale_x * lnd_float[:, 0]
	lnd_float[:, 1] = self.map_scale_y * lnd_float[:, 1]

	# Filter landmarks
	lnd = np.round(lnd_float)
	filter_x = lnd[:, 0] >= self.map_size_x
	filter_y = lnd[:, 1] >= self.map_size_y
	lnd[filter_x] = self.map_size_x - 1
	lnd[filter_y] = self.map_size_y - 1
	new_lnd = (lnd.T * sample['mask_ldm']).T
	new_lnd = new_lnd.astype(int).astype(float)

	sample['landmarks_float'] = lnd_float
	sample['landmarks'] = new_lnd
	sample['img2map_scale'] = [self.map_scale_x, self.map_scale_y]
	return sample



	class OcclusionAug:
	def __init__(self, min_length=0.1, max_length=0.4, num_maps=1):
	self.min_length = min_length
	self.max_length = max_length
	self.num_maps = num_maps

	def __call__(self, sample):
	x, y, w, h = sample['bbox']
	image = sample['image']
	landmarks = sample['landmarks']
	vis = sample['visible']

	min_ratio = self.min_length
	max_ratio = self.max_length
	rnd_width = np.random.randint(int(w * min_ratio), int(w * max_ratio))
	rnd_height = np.random.randint(int(h * min_ratio), int(h * max_ratio))

	# (xi, yi) and (xf, yf) are, respectively, the lower left points of the
	# occlusion rectangle and the upper right point.
	xi = int(x + np.random.randint(0, w - rnd_width))
	xf = int(xi + rnd_width)
	yi = int(y + np.random.randint(0, h - rnd_height))
	yf = int(yi + rnd_height)

	pixels = np.array(image)
	pixels[yi:yf, xi:xf, :] = np.random.uniform(0, 255, size=3)
	image = Image.fromarray(pixels)
	sample['image'] = image

	# Update visibilities
	filter_x1 = landmarks[:, 0] >= xi
	filter_x2 = landmarks[:, 0] < xf
	filter_x = np.logical_and(filter_x1, filter_x2)

	filter_y1 = landmarks[:, 1] >= yi
	filter_y2 = landmarks[:, 1] < yf
	filter_y = np.logical_and(filter_y1, filter_y2)

	filter_novis = np.logical_and(filter_x, filter_y)
	filter_vis = np.logical_not(filter_novis)
	sample['visible'] = vis * filter_vis
	return sample


	class LightingAug:
	def __init__(self, hsv_range_min=(-0.5, -0.5, -0.5), hsv_range_max=(0.5, 0.5, 0.5)):
	self.hsv_range_min = hsv_range_min
	self.hsv_range_max = hsv_range_max

	def __call__(self, sample):
	# Convert to HSV colorspace from RGB colorspace
	image = np.array(sample['image'])
	hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)

	# Generate new random values
	H = 1 + np.random.uniform(self.hsv_range_min[0], self.hsv_range_max[0])
	S = 1 + np.random.uniform(self.hsv_range_min[1], self.hsv_range_max[1])
	V = 1 + np.random.uniform(self.hsv_range_min[2], self.hsv_range_max[2])
	hsv[:, :, 0] = np.clip(H*hsv[:, :, 0], 0, 179)
	hsv[:, :, 1] = np.clip(S*hsv[:, :, 1], 0, 255)
	hsv[:, :, 2] = np.clip(V*hsv[:, :, 2], 0, 255)
	# Convert back to BGR colorspace
	image = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
	sample['image'] = Image.fromarray(image)

	return sample


	class BlurAug:
	def __init__(self, blur_prob=0.5, blur_kernel_range=(0, 2)):
	self.blur_prob = blur_prob
	self.kernel_range = blur_kernel_range

	def __call__(self, sample):
	# Smooth image
	image = np.array(sample['image'])
	if np.random.uniform(0.0, 1.0) < self.blur_prob:
	kernel = np.random.random_integers(self.kernel_range[0], self.kernel_range[1]) * 2 + 1
	image = cv2.GaussianBlur(image, (kernel, kernel), 0, 0)
	sample['image'] = Image.fromarray(image)

	return sample