Spaces:

svjack
/

ControlNet-Face-Chinese

Running

File size: 11,374 Bytes

d015578

import random
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms

# My libs
import spiga.data.loaders.augmentors.utils as dlu


class HorizontalFlipAug:
    def __init__(self, ldm_flip_order, prob=0.5):
        self.prob = prob
        self.ldm_flip_order = ldm_flip_order

    def __call__(self, sample):
        img = sample['image']
        landmarks = sample['landmarks']
        mask = sample['mask_ldm']
        vis = sample['visible']
        bbox = sample['bbox']

        if random.random() < self.prob:
            new_img = transforms.functional.hflip(img)

            lm_new_order = self.ldm_flip_order
            new_landmarks = landmarks[lm_new_order]
            new_landmarks = (new_landmarks - (img.size[0], 0)) * (-1, 1)
            new_mask = mask[lm_new_order]
            new_vis = vis[lm_new_order]

            x, y, w, h = bbox
            new_x = img.size[0] - x - w
            new_bbox = np.array((new_x, y, w, h))

            sample['image'] = new_img
            sample['landmarks'] = new_landmarks
            sample['mask_ldm'] = new_mask
            sample['visible'] = new_vis
            sample['bbox'] = new_bbox

        return sample


class GeometryBaseAug:

    def __call__(self, sample):
        raise NotImplementedError('Inheritance __call__ not defined')

    def map_affine_transformation(self, sample, affine_transf, new_size=None):
        sample['image'] = self._image_affine_trans(sample['image'], affine_transf, new_size)
        sample['bbox'] = self._bbox_affine_trans(sample['bbox'], affine_transf)
        if 'landmarks' in sample.keys():
            sample['landmarks'] = self._landmarks_affine_trans(sample['landmarks'], affine_transf)
        return sample

    def clean_outbbox_landmarks(self, shape, landmarks, mask):
        filter_x1 = landmarks[:, 0] >= shape[0]
        filter_x2 = landmarks[:, 0] < (shape[0] + shape[2])
        filter_x = np.logical_and(filter_x1,filter_x2)

        filter_y1 = landmarks[:, 1] >= shape[1]
        filter_y2 = landmarks[:, 1] < (shape[1] + shape[3])
        filter_y = np.logical_and(filter_y1, filter_y2)

        filter_bbox = np.logical_and(filter_x, filter_y)
        new_mask = mask*filter_bbox
        new_landmarks = (landmarks.T * new_mask).T
        new_landmarks = new_landmarks.astype(int).astype(float)
        return new_mask, new_landmarks

    def _image_affine_trans(self, image, affine_transf, new_size=None):

        if not new_size:
            new_size = image.size

        inv_affine_transf = dlu.get_inverse_transf(affine_transf)
        new_image = image.transform(new_size, Image.AFFINE, inv_affine_transf.flatten())
        return new_image

    def _bbox_affine_trans(self, bbox, affine_transf):

        x, y, w, h = bbox
        images_bb = []
        for point in ([x, y, 1], [x + w, y, 1],
                      [x, y + h, 1], [x + w, y + h, 1]):
            images_bb.append(affine_transf.dot(point))
        images_bb = np.array(images_bb)

        new_corner0 = np.min(images_bb, axis=0)
        new_corner1 = np.max(images_bb, axis=0)

        new_x, new_y = new_corner0
        new_w, new_h = new_corner1 - new_corner0
        new_bbox = np.array((new_x, new_y, new_w, new_h))
        return new_bbox

    def _landmarks_affine_trans(self, landmarks, affine_transf):

        homog_landmarks = dlu.affine2homogeneous(landmarks)
        new_landmarks = affine_transf.dot(homog_landmarks.T).T
        return new_landmarks


class RSTAug(GeometryBaseAug):

    def __init__(self, angle_range=45., scale_min=-0.15, scale_max=0.15, trl_ratio=0.05):
        self.scale_max = scale_max
        self.scale_min = scale_min
        self.angle_range = angle_range
        self.trl_ratio = trl_ratio

    def __call__(self, sample):
        x, y, w, h = sample['bbox']

        x0, y0 = x + w/2, y + h/2  # center of the face, which will be the center of the rotation

        # Bbox translation
        rnd_Tx = np.random.uniform(-self.trl_ratio, self.trl_ratio) * w
        rnd_Ty = np.random.uniform(-self.trl_ratio, self.trl_ratio) * h
        sample['bbox'][0] += rnd_Tx
        sample['bbox'][1] += rnd_Ty

        scale = 1 + np.random.uniform(self.scale_min, self.scale_max)
        angle = np.random.uniform(-self.angle_range, self.angle_range)

        similarity = dlu.get_similarity_matrix(angle, scale, center=(x0, y0))
        new_sample = self.map_affine_transformation(sample, similarity)
        return new_sample


class TargetCropAug(GeometryBaseAug):
    def __init__(self, img_new_size=128, map_new_size=128, target_dist=1.3):

        self.target_dist = target_dist
        self.new_size_x, self.new_size_y = self._convert_shapes(img_new_size)
        self.map_size_x, self.map_size_y = self._convert_shapes(map_new_size)
        self.img2map_scale = False

        # Mismatch between img shape and featuremap shape
        if self.map_size_x != self.new_size_x or self.map_size_y != self.new_size_y:
            self.img2map_scale = True
            self.map_scale_x = self.map_size_x / self.new_size_x
            self.map_scale_y = self.map_size_y / self.new_size_y
            self.map_scale_xx = self.map_scale_x * self.map_scale_x
            self.map_scale_xy = self.map_scale_x * self.map_scale_y
            self.map_scale_yy = self.map_scale_y * self.map_scale_y

    def _convert_shapes(self, new_size):
        if isinstance(new_size, (tuple, list)):
            new_size_x = new_size[0]
            new_size_y = new_size[1]
        else:
            new_size_x = new_size
            new_size_y = new_size
        return new_size_x, new_size_y

    def __call__(self, sample):
        x, y, w, h = sample['bbox']
        # we enlarge the area taken around the bounding box
        # it is neccesary to change the botton left point of the bounding box
        # according to the previous enlargement. Note this will NOT be the new
        # bounding box!
        # We return square images, which is neccesary since
        # all the images must have the same size in order to form batches
        side = max(w, h) * self.target_dist
        x -= (side - w) / 2
        y -= (side - h) / 2

        # center of the enlarged bounding box
        x0, y0 = x + side/2, y + side/2
        # homothety factor, chosen so the new horizontal dimension will
        # coincide with new_size
        mu_x = self.new_size_x / side
        mu_y = self.new_size_y / side

        # new_w, new_h = new_size, int(h * mu)
        new_w = self.new_size_x
        new_h = self.new_size_y
        new_x0, new_y0 = new_w / 2, new_h / 2

        # dilatation + translation
        affine_transf = np.array([[mu_x, 0, new_x0 - mu_x * x0],
                                  [0, mu_y, new_y0 - mu_y * y0]])

        sample = self.map_affine_transformation(sample, affine_transf,(new_w, new_h))
        if 'landmarks' in sample.keys():
            img_shape = np.array([0, 0, self.new_size_x, self.new_size_y])
            sample['landmarks_float'] = sample['landmarks']
            sample['mask_ldm_float'] = sample['mask_ldm']
            sample['landmarks'] = np.round(sample['landmarks'])
            sample['mask_ldm'], sample['landmarks'] = self.clean_outbbox_landmarks(img_shape, sample['landmarks'],
                                                                                   sample['mask_ldm'])

            if self.img2map_scale:
                sample = self._rescale_map(sample)
        return sample

    def _rescale_map(self, sample):

        # Rescale
        lnd_float = sample['landmarks_float']
        lnd_float[:, 0] = self.map_scale_x * lnd_float[:, 0]
        lnd_float[:, 1] = self.map_scale_y * lnd_float[:, 1]

        # Filter landmarks
        lnd = np.round(lnd_float)
        filter_x = lnd[:, 0] >= self.map_size_x
        filter_y = lnd[:, 1] >= self.map_size_y
        lnd[filter_x] = self.map_size_x - 1
        lnd[filter_y] = self.map_size_y - 1
        new_lnd = (lnd.T * sample['mask_ldm']).T
        new_lnd = new_lnd.astype(int).astype(float)

        sample['landmarks_float'] = lnd_float
        sample['landmarks'] = new_lnd
        sample['img2map_scale'] = [self.map_scale_x, self.map_scale_y]
        return sample



class OcclusionAug:
    def __init__(self, min_length=0.1, max_length=0.4, num_maps=1):
        self.min_length = min_length
        self.max_length = max_length
        self.num_maps = num_maps

    def __call__(self, sample):
        x, y, w, h = sample['bbox']
        image = sample['image']
        landmarks = sample['landmarks']
        vis = sample['visible']

        min_ratio = self.min_length
        max_ratio = self.max_length
        rnd_width = np.random.randint(int(w * min_ratio), int(w * max_ratio))
        rnd_height = np.random.randint(int(h * min_ratio), int(h * max_ratio))

        # (xi, yi) and (xf, yf) are, respectively, the lower left points of the
        # occlusion rectangle and the upper right point.
        xi = int(x + np.random.randint(0, w - rnd_width))
        xf = int(xi + rnd_width)
        yi = int(y + np.random.randint(0, h - rnd_height))
        yf = int(yi + rnd_height)

        pixels = np.array(image)
        pixels[yi:yf, xi:xf, :] = np.random.uniform(0, 255, size=3)
        image = Image.fromarray(pixels)
        sample['image'] = image

        # Update visibilities
        filter_x1 = landmarks[:, 0] >= xi
        filter_x2 = landmarks[:, 0] < xf
        filter_x = np.logical_and(filter_x1, filter_x2)

        filter_y1 = landmarks[:, 1] >= yi
        filter_y2 = landmarks[:, 1] < yf
        filter_y = np.logical_and(filter_y1, filter_y2)

        filter_novis = np.logical_and(filter_x, filter_y)
        filter_vis = np.logical_not(filter_novis)
        sample['visible'] = vis * filter_vis
        return sample


class LightingAug:
    def __init__(self, hsv_range_min=(-0.5, -0.5, -0.5), hsv_range_max=(0.5, 0.5, 0.5)):
        self.hsv_range_min = hsv_range_min
        self.hsv_range_max = hsv_range_max

    def __call__(self, sample):
        # Convert to HSV colorspace from RGB colorspace
        image = np.array(sample['image'])
        hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)

        # Generate new random values
        H = 1 + np.random.uniform(self.hsv_range_min[0], self.hsv_range_max[0])
        S = 1 + np.random.uniform(self.hsv_range_min[1], self.hsv_range_max[1])
        V = 1 + np.random.uniform(self.hsv_range_min[2], self.hsv_range_max[2])
        hsv[:, :, 0] = np.clip(H*hsv[:, :, 0], 0, 179)
        hsv[:, :, 1] = np.clip(S*hsv[:, :, 1], 0, 255)
        hsv[:, :, 2] = np.clip(V*hsv[:, :, 2], 0, 255)
        # Convert back to BGR colorspace
        image = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
        sample['image'] = Image.fromarray(image)

        return sample


class BlurAug:
    def __init__(self, blur_prob=0.5, blur_kernel_range=(0, 2)):
        self.blur_prob = blur_prob
        self.kernel_range = blur_kernel_range

    def __call__(self, sample):
        # Smooth image
        image = np.array(sample['image'])
        if np.random.uniform(0.0, 1.0) < self.blur_prob:
            kernel = np.random.random_integers(self.kernel_range[0], self.kernel_range[1]) * 2 + 1
            image = cv2.GaussianBlur(image, (kernel, kernel), 0, 0)
        sample['image'] = Image.fromarray(image)

        return sample