Spaces:

svjack
/

ControlNet-Face-Chinese

Running

File size: 7,711 Bytes

d015578

import os
import pkg_resources
import numpy as np
import cv2

# My libs
from spiga.data.loaders.augmentors.utils import rotation_matrix_to_euler

# Model file nomenclature
model_file_dft = pkg_resources.resource_filename('spiga', 'data/models3D') + '/mean_face_3D_{num_ldm}.txt'


class PositPose:

    def __init__(self, ldm_ids, focal_ratio=1, selected_ids=None, max_iter=100,
                  fix_bbox=True, model_file=model_file_dft):

        # Load 3D face model
        model3d_world, model3d_ids = self._load_world_shape(ldm_ids, model_file)

        # Generate id mask to pick only the robust landmarks for posit
        if selected_ids is None:
            model3d_mask = np.ones(len(ldm_ids))
        else:
            model3d_mask = np.zeros(len(ldm_ids))
            for index, posit_id in enumerate(model3d_ids):
                if posit_id in selected_ids:
                    model3d_mask[index] = 1

        self.ldm_ids = ldm_ids                  # Ids from the database
        self.model3d_world = model3d_world      # Model data
        self.model3d_ids = model3d_ids          # Model ids
        self.model3d_mask = model3d_mask        # Model mask ids
        self.max_iter = max_iter                # Refinement iterations
        self.focal_ratio = focal_ratio          # Camera matrix focal length ratio
        self.fix_bbox = fix_bbox                # Camera matrix centered on image (False to centered on bbox)

    def __call__(self, sample):

        landmarks = sample['landmarks']
        mask = sample['mask_ldm']

        # Camera matrix
        img_shape = np.array(sample['image'].shape)[0:2]
        if 'img2map_scale' in sample.keys():
            img_shape = img_shape * sample['img2map_scale']

        if self.fix_bbox:
            img_bbox = [0, 0, img_shape[1], img_shape[0]]   # Shapes given are inverted (y,x)
            cam_matrix = self._camera_matrix(img_bbox)
        else:
            bbox = sample['bbox']   # Scale error when ftshape and img_shape mismatch
            cam_matrix = self._camera_matrix(bbox)

        # Save intrinsic matrix and 3D model landmarks
        sample['cam_matrix'] = cam_matrix
        sample['model3d'] = self.model3d_world

        world_pts, image_pts = self._set_correspondences(landmarks, mask)

        if image_pts.shape[0] < 4:
            print('POSIT does not work without landmarks')
            rot_matrix, trl_matrix = np.eye(3, dtype=float), np.array([0, 0, 0])
        else:
            rot_matrix, trl_matrix = self._modern_posit(world_pts, image_pts, cam_matrix)

        euler = rotation_matrix_to_euler(rot_matrix)
        sample['pose'] = np.array([euler[0], euler[1], euler[2], trl_matrix[0], trl_matrix[1], trl_matrix[2]])
        sample['model3d_proj'] = self._project_points(rot_matrix, trl_matrix, cam_matrix, norm=img_shape)
        return sample

    def _load_world_shape(self, ldm_ids, model_file):
        return load_world_shape(ldm_ids, model_file=model_file)

    def _camera_matrix(self, bbox):
        focal_length_x = bbox[2] * self.focal_ratio
        focal_length_y = bbox[3] * self.focal_ratio
        face_center = (bbox[0] + (bbox[2] * 0.5)), (bbox[1] + (bbox[3] * 0.5))

        cam_matrix = np.array([[focal_length_x, 0, face_center[0]],
                               [0, focal_length_y, face_center[1]],
                               [0, 0, 1]])
        return cam_matrix

    def _set_correspondences(self, landmarks, mask):
        # Correspondences using labelled and robust landmarks
        img_mask = np.logical_and(mask, self.model3d_mask)
        img_mask = img_mask.astype(bool)

        image_pts = landmarks[img_mask]
        world_pts = self.model3d_world[img_mask]
        return world_pts, image_pts

    def _modern_posit(self, world_pts, image_pts, cam_matrix):
        return modern_posit(world_pts, image_pts, cam_matrix, self.max_iter)

    def _project_points(self, rot, trl, cam_matrix, norm=None):
        # Perspective projection model
        trl = np.expand_dims(trl, 1)
        extrinsics = np.concatenate((rot, trl), 1)
        proj_matrix = np.matmul(cam_matrix, extrinsics)

        # Homogeneous landmarks
        pts = self.model3d_world
        ones = np.ones(pts.shape[0])
        ones = np.expand_dims(ones, 1)
        pts_hom = np.concatenate((pts, ones), 1)

        # Project landmarks
        pts_proj = np.matmul(proj_matrix, pts_hom.T).T
        pts_proj = pts_proj / np.expand_dims(pts_proj[:, 2], 1) # Lambda = 1

        if norm is not None:
            pts_proj[:, 0] /= norm[0]
            pts_proj[:, 1] /= norm[1]
        return pts_proj[:, :-1]


def load_world_shape(db_landmarks, model_file=model_file_dft):

    # Load 3D mean face coordinates
    num_ldm = len(db_landmarks)
    filename = model_file.format(num_ldm=num_ldm)
    if not os.path.exists(filename):
        raise ValueError('No 3D model find for %i landmarks' % num_ldm)

    posit_landmarks = np.genfromtxt(filename, delimiter='|', dtype=int, usecols=0).tolist()
    mean_face_3D = np.genfromtxt(filename, delimiter='|', dtype=(float, float, float), usecols=(1, 2, 3)).tolist()
    world_all = len(mean_face_3D)*[None]
    index_all = len(mean_face_3D)*[None]

    for cont, elem in enumerate(mean_face_3D):
        pt3d = [elem[2], -elem[0], -elem[1]]
        lnd_idx = db_landmarks.index(posit_landmarks[cont])
        world_all[lnd_idx] = pt3d
        index_all[lnd_idx] = posit_landmarks[cont]

    return np.array(world_all), np.array(index_all)


def modern_posit(world_pts, image_pts, cam_matrix, max_iters):
    # Homogeneous world points
    num_landmarks = image_pts.shape[0]
    one = np.ones((num_landmarks, 1))
    A = np.concatenate((world_pts, one), axis=1)
    B = np.linalg.pinv(A)

    # Normalize image points
    focal_length = cam_matrix[0,0]
    img_center = (cam_matrix[0,2], cam_matrix[1,2])
    centered_pts = np.zeros((num_landmarks,2))
    centered_pts[:,0] = (image_pts[:,0]-img_center[0])/focal_length
    centered_pts[:,1] = (image_pts[:,1]-img_center[1])/focal_length
    Ui = centered_pts[:,0]
    Vi = centered_pts[:,1]

    # POSIT loop
    Tx, Ty, Tz = 0.0, 0.0, 0.0
    r1, r2, r3 = [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]
    for iter in range(0, max_iters):
        I = np.dot(B,Ui)
        J = np.dot(B,Vi)

        # Estimate translation vector and rotation matrix
        normI = 1.0 / np.sqrt(I[0] * I[0] + I[1] * I[1] + I[2] * I[2])
        normJ = 1.0 / np.sqrt(J[0] * J[0] + J[1] * J[1] + J[2] * J[2])
        Tz = np.sqrt(normI * normJ)  # geometric average instead of arithmetic average of classicPosit
        r1N = I*Tz
        r2N = J*Tz
        r1 = r1N[0:3]
        r2 = r2N[0:3]
        r1 = np.clip(r1, -1, 1)
        r2 = np.clip(r2, -1, 1)
        r3 = np.cross(r1,r2)
        r3T = np.concatenate((r3, [Tz]), axis=0)
        Tx = r1N[3]
        Ty = r2N[3]

        # Compute epsilon, update Ui and Vi and check convergence
        eps = np.dot(A, r3T)/Tz
        oldUi = Ui
        oldVi = Vi
        Ui = np.multiply(eps, centered_pts[:,0])
        Vi = np.multiply(eps, centered_pts[:,1])
        deltaUi = Ui - oldUi
        deltaVi = Vi - oldVi
        delta = focal_length * focal_length * (np.dot(np.transpose(deltaUi), deltaUi) + np.dot(np.transpose(deltaVi), deltaVi))
        if iter > 0 and delta < 0.01:  # converged
            break

    rot_matrix = np.array([np.transpose(r1), np.transpose(r2), np.transpose(r3)])
    trl_matrix = np.array([Tx, Ty, Tz])
    # Convert to the nearest orthogonal rotation matrix
    w, u, vt = cv2.SVDecomp(rot_matrix)  # R = U*D*Vt
    rot_matrix = np.matmul(np.matmul(u, np.eye(3, dtype=float)), vt)
    return rot_matrix, trl_matrix