Spaces:
Running
Running
import os | |
import pkg_resources | |
import numpy as np | |
import cv2 | |
# My libs | |
from spiga.data.loaders.augmentors.utils import rotation_matrix_to_euler | |
# Model file nomenclature | |
model_file_dft = pkg_resources.resource_filename('spiga', 'data/models3D') + '/mean_face_3D_{num_ldm}.txt' | |
class PositPose: | |
def __init__(self, ldm_ids, focal_ratio=1, selected_ids=None, max_iter=100, | |
fix_bbox=True, model_file=model_file_dft): | |
# Load 3D face model | |
model3d_world, model3d_ids = self._load_world_shape(ldm_ids, model_file) | |
# Generate id mask to pick only the robust landmarks for posit | |
if selected_ids is None: | |
model3d_mask = np.ones(len(ldm_ids)) | |
else: | |
model3d_mask = np.zeros(len(ldm_ids)) | |
for index, posit_id in enumerate(model3d_ids): | |
if posit_id in selected_ids: | |
model3d_mask[index] = 1 | |
self.ldm_ids = ldm_ids # Ids from the database | |
self.model3d_world = model3d_world # Model data | |
self.model3d_ids = model3d_ids # Model ids | |
self.model3d_mask = model3d_mask # Model mask ids | |
self.max_iter = max_iter # Refinement iterations | |
self.focal_ratio = focal_ratio # Camera matrix focal length ratio | |
self.fix_bbox = fix_bbox # Camera matrix centered on image (False to centered on bbox) | |
def __call__(self, sample): | |
landmarks = sample['landmarks'] | |
mask = sample['mask_ldm'] | |
# Camera matrix | |
img_shape = np.array(sample['image'].shape)[0:2] | |
if 'img2map_scale' in sample.keys(): | |
img_shape = img_shape * sample['img2map_scale'] | |
if self.fix_bbox: | |
img_bbox = [0, 0, img_shape[1], img_shape[0]] # Shapes given are inverted (y,x) | |
cam_matrix = self._camera_matrix(img_bbox) | |
else: | |
bbox = sample['bbox'] # Scale error when ftshape and img_shape mismatch | |
cam_matrix = self._camera_matrix(bbox) | |
# Save intrinsic matrix and 3D model landmarks | |
sample['cam_matrix'] = cam_matrix | |
sample['model3d'] = self.model3d_world | |
world_pts, image_pts = self._set_correspondences(landmarks, mask) | |
if image_pts.shape[0] < 4: | |
print('POSIT does not work without landmarks') | |
rot_matrix, trl_matrix = np.eye(3, dtype=float), np.array([0, 0, 0]) | |
else: | |
rot_matrix, trl_matrix = self._modern_posit(world_pts, image_pts, cam_matrix) | |
euler = rotation_matrix_to_euler(rot_matrix) | |
sample['pose'] = np.array([euler[0], euler[1], euler[2], trl_matrix[0], trl_matrix[1], trl_matrix[2]]) | |
sample['model3d_proj'] = self._project_points(rot_matrix, trl_matrix, cam_matrix, norm=img_shape) | |
return sample | |
def _load_world_shape(self, ldm_ids, model_file): | |
return load_world_shape(ldm_ids, model_file=model_file) | |
def _camera_matrix(self, bbox): | |
focal_length_x = bbox[2] * self.focal_ratio | |
focal_length_y = bbox[3] * self.focal_ratio | |
face_center = (bbox[0] + (bbox[2] * 0.5)), (bbox[1] + (bbox[3] * 0.5)) | |
cam_matrix = np.array([[focal_length_x, 0, face_center[0]], | |
[0, focal_length_y, face_center[1]], | |
[0, 0, 1]]) | |
return cam_matrix | |
def _set_correspondences(self, landmarks, mask): | |
# Correspondences using labelled and robust landmarks | |
img_mask = np.logical_and(mask, self.model3d_mask) | |
img_mask = img_mask.astype(bool) | |
image_pts = landmarks[img_mask] | |
world_pts = self.model3d_world[img_mask] | |
return world_pts, image_pts | |
def _modern_posit(self, world_pts, image_pts, cam_matrix): | |
return modern_posit(world_pts, image_pts, cam_matrix, self.max_iter) | |
def _project_points(self, rot, trl, cam_matrix, norm=None): | |
# Perspective projection model | |
trl = np.expand_dims(trl, 1) | |
extrinsics = np.concatenate((rot, trl), 1) | |
proj_matrix = np.matmul(cam_matrix, extrinsics) | |
# Homogeneous landmarks | |
pts = self.model3d_world | |
ones = np.ones(pts.shape[0]) | |
ones = np.expand_dims(ones, 1) | |
pts_hom = np.concatenate((pts, ones), 1) | |
# Project landmarks | |
pts_proj = np.matmul(proj_matrix, pts_hom.T).T | |
pts_proj = pts_proj / np.expand_dims(pts_proj[:, 2], 1) # Lambda = 1 | |
if norm is not None: | |
pts_proj[:, 0] /= norm[0] | |
pts_proj[:, 1] /= norm[1] | |
return pts_proj[:, :-1] | |
def load_world_shape(db_landmarks, model_file=model_file_dft): | |
# Load 3D mean face coordinates | |
num_ldm = len(db_landmarks) | |
filename = model_file.format(num_ldm=num_ldm) | |
if not os.path.exists(filename): | |
raise ValueError('No 3D model find for %i landmarks' % num_ldm) | |
posit_landmarks = np.genfromtxt(filename, delimiter='|', dtype=int, usecols=0).tolist() | |
mean_face_3D = np.genfromtxt(filename, delimiter='|', dtype=(float, float, float), usecols=(1, 2, 3)).tolist() | |
world_all = len(mean_face_3D)*[None] | |
index_all = len(mean_face_3D)*[None] | |
for cont, elem in enumerate(mean_face_3D): | |
pt3d = [elem[2], -elem[0], -elem[1]] | |
lnd_idx = db_landmarks.index(posit_landmarks[cont]) | |
world_all[lnd_idx] = pt3d | |
index_all[lnd_idx] = posit_landmarks[cont] | |
return np.array(world_all), np.array(index_all) | |
def modern_posit(world_pts, image_pts, cam_matrix, max_iters): | |
# Homogeneous world points | |
num_landmarks = image_pts.shape[0] | |
one = np.ones((num_landmarks, 1)) | |
A = np.concatenate((world_pts, one), axis=1) | |
B = np.linalg.pinv(A) | |
# Normalize image points | |
focal_length = cam_matrix[0,0] | |
img_center = (cam_matrix[0,2], cam_matrix[1,2]) | |
centered_pts = np.zeros((num_landmarks,2)) | |
centered_pts[:,0] = (image_pts[:,0]-img_center[0])/focal_length | |
centered_pts[:,1] = (image_pts[:,1]-img_center[1])/focal_length | |
Ui = centered_pts[:,0] | |
Vi = centered_pts[:,1] | |
# POSIT loop | |
Tx, Ty, Tz = 0.0, 0.0, 0.0 | |
r1, r2, r3 = [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0] | |
for iter in range(0, max_iters): | |
I = np.dot(B,Ui) | |
J = np.dot(B,Vi) | |
# Estimate translation vector and rotation matrix | |
normI = 1.0 / np.sqrt(I[0] * I[0] + I[1] * I[1] + I[2] * I[2]) | |
normJ = 1.0 / np.sqrt(J[0] * J[0] + J[1] * J[1] + J[2] * J[2]) | |
Tz = np.sqrt(normI * normJ) # geometric average instead of arithmetic average of classicPosit | |
r1N = I*Tz | |
r2N = J*Tz | |
r1 = r1N[0:3] | |
r2 = r2N[0:3] | |
r1 = np.clip(r1, -1, 1) | |
r2 = np.clip(r2, -1, 1) | |
r3 = np.cross(r1,r2) | |
r3T = np.concatenate((r3, [Tz]), axis=0) | |
Tx = r1N[3] | |
Ty = r2N[3] | |
# Compute epsilon, update Ui and Vi and check convergence | |
eps = np.dot(A, r3T)/Tz | |
oldUi = Ui | |
oldVi = Vi | |
Ui = np.multiply(eps, centered_pts[:,0]) | |
Vi = np.multiply(eps, centered_pts[:,1]) | |
deltaUi = Ui - oldUi | |
deltaVi = Vi - oldVi | |
delta = focal_length * focal_length * (np.dot(np.transpose(deltaUi), deltaUi) + np.dot(np.transpose(deltaVi), deltaVi)) | |
if iter > 0 and delta < 0.01: # converged | |
break | |
rot_matrix = np.array([np.transpose(r1), np.transpose(r2), np.transpose(r3)]) | |
trl_matrix = np.array([Tx, Ty, Tz]) | |
# Convert to the nearest orthogonal rotation matrix | |
w, u, vt = cv2.SVDecomp(rot_matrix) # R = U*D*Vt | |
rot_matrix = np.matmul(np.matmul(u, np.eye(3, dtype=float)), vt) | |
return rot_matrix, trl_matrix | |