|
|
|
|
|
|
|
import os |
|
|
|
import cv2 |
|
import torch |
|
import numpy as np |
|
from .dwpose import util |
|
from .dwpose.wholebody import Wholebody, HWC3, resize_image |
|
from .utils import convert_to_numpy |
|
|
|
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" |
|
|
|
|
|
|
|
def draw_pose(pose, H, W, use_hand=False, use_body=False, use_face=False): |
|
bodies = pose['bodies'] |
|
faces = pose['faces'] |
|
hands = pose['hands'] |
|
candidate = bodies['candidate'] |
|
subset = bodies['subset'] |
|
canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8) |
|
|
|
if use_body: |
|
canvas = util.draw_bodypose(canvas, candidate, subset) |
|
if use_hand: |
|
canvas = util.draw_handpose(canvas, hands) |
|
if use_face: |
|
canvas = util.draw_facepose(canvas, faces) |
|
|
|
return canvas |
|
|
|
|
|
class PoseAnnotator: |
|
def __init__(self, cfg, device=None): |
|
onnx_det = cfg['DETECTION_MODEL'] |
|
onnx_pose = cfg['POSE_MODEL'] |
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device |
|
self.pose_estimation = Wholebody(onnx_det, onnx_pose, device=self.device) |
|
self.resize_size = cfg.get("RESIZE_SIZE", 1024) |
|
self.use_body = cfg.get('USE_BODY', True) |
|
self.use_face = cfg.get('USE_FACE', True) |
|
self.use_hand = cfg.get('USE_HAND', True) |
|
|
|
@torch.no_grad() |
|
@torch.inference_mode |
|
def forward(self, image): |
|
image = convert_to_numpy(image) |
|
input_image = HWC3(image[..., ::-1]) |
|
return self.process(resize_image(input_image, self.resize_size), image.shape[:2]) |
|
|
|
def process(self, ori_img, ori_shape): |
|
ori_h, ori_w = ori_shape |
|
ori_img = ori_img.copy() |
|
H, W, C = ori_img.shape |
|
with torch.no_grad(): |
|
candidate, subset, det_result = self.pose_estimation(ori_img) |
|
nums, keys, locs = candidate.shape |
|
candidate[..., 0] /= float(W) |
|
candidate[..., 1] /= float(H) |
|
body = candidate[:, :18].copy() |
|
body = body.reshape(nums * 18, locs) |
|
score = subset[:, :18] |
|
for i in range(len(score)): |
|
for j in range(len(score[i])): |
|
if score[i][j] > 0.3: |
|
score[i][j] = int(18 * i + j) |
|
else: |
|
score[i][j] = -1 |
|
|
|
un_visible = subset < 0.3 |
|
candidate[un_visible] = -1 |
|
|
|
foot = candidate[:, 18:24] |
|
|
|
faces = candidate[:, 24:92] |
|
|
|
hands = candidate[:, 92:113] |
|
hands = np.vstack([hands, candidate[:, 113:]]) |
|
|
|
bodies = dict(candidate=body, subset=score) |
|
pose = dict(bodies=bodies, hands=hands, faces=faces) |
|
|
|
ret_data = {} |
|
if self.use_body: |
|
detected_map_body = draw_pose(pose, H, W, use_body=True) |
|
detected_map_body = cv2.resize(detected_map_body[..., ::-1], (ori_w, ori_h), |
|
interpolation=cv2.INTER_LANCZOS4 if ori_h * ori_w > H * W else cv2.INTER_AREA) |
|
ret_data["detected_map_body"] = detected_map_body |
|
|
|
if self.use_face: |
|
detected_map_face = draw_pose(pose, H, W, use_face=True) |
|
detected_map_face = cv2.resize(detected_map_face[..., ::-1], (ori_w, ori_h), |
|
interpolation=cv2.INTER_LANCZOS4 if ori_h * ori_w > H * W else cv2.INTER_AREA) |
|
ret_data["detected_map_face"] = detected_map_face |
|
|
|
if self.use_body and self.use_face: |
|
detected_map_bodyface = draw_pose(pose, H, W, use_body=True, use_face=True) |
|
detected_map_bodyface = cv2.resize(detected_map_bodyface[..., ::-1], (ori_w, ori_h), |
|
interpolation=cv2.INTER_LANCZOS4 if ori_h * ori_w > H * W else cv2.INTER_AREA) |
|
ret_data["detected_map_bodyface"] = detected_map_bodyface |
|
|
|
if self.use_hand and self.use_body and self.use_face: |
|
detected_map_handbodyface = draw_pose(pose, H, W, use_hand=True, use_body=True, use_face=True) |
|
detected_map_handbodyface = cv2.resize(detected_map_handbodyface[..., ::-1], (ori_w, ori_h), |
|
interpolation=cv2.INTER_LANCZOS4 if ori_h * ori_w > H * W else cv2.INTER_AREA) |
|
ret_data["detected_map_handbodyface"] = detected_map_handbodyface |
|
|
|
|
|
if det_result.shape[0] > 0: |
|
w_ratio, h_ratio = ori_w / W, ori_h / H |
|
det_result[..., ::2] *= h_ratio |
|
det_result[..., 1::2] *= w_ratio |
|
det_result = det_result.astype(np.int32) |
|
return ret_data, det_result |
|
|
|
|
|
class PoseBodyFaceAnnotator(PoseAnnotator): |
|
def __init__(self, cfg): |
|
super().__init__(cfg) |
|
self.use_body, self.use_face, self.use_hand = True, True, False |
|
@torch.no_grad() |
|
@torch.inference_mode |
|
def forward(self, image): |
|
ret_data, det_result = super().forward(image) |
|
return ret_data['detected_map_bodyface'] |
|
|
|
|
|
class PoseBodyFaceVideoAnnotator(PoseBodyFaceAnnotator): |
|
def forward(self, frames): |
|
ret_frames = [] |
|
for frame in frames: |
|
anno_frame = super().forward(np.array(frame)) |
|
ret_frames.append(anno_frame) |
|
return ret_frames |
|
|
|
|