Spaces:
Sleeping
Sleeping
from lib.kits.basic import * | |
import cv2 | |
import imageio | |
from lib.utils.data import to_numpy | |
from lib.utils.vis import ColorPalette | |
def annotate_img( | |
img : np.ndarray, | |
text : str, | |
pos : Union[str, Tuple[int, int]] = 'bl', | |
): | |
''' | |
Annotate the image with the given text. | |
### Args | |
- img: np.ndarray, (H, W, 3) | |
- text: str | |
- pos: str or tuple(int, int), default 'bl' | |
- If str, one of ['tl', 'bl']. | |
- If tuple, the position of the text. | |
### Returns | |
- np.ndarray, (H, W, 3) | |
- The annotated image. | |
''' | |
assert len(img.shape) == 3, 'img must have 3 dimensions.' | |
return annotate_video(frames=img[None], text=text, pos=pos)[0] | |
def annotate_video( | |
frames : np.ndarray, | |
text : str, | |
pos : Union[str, Tuple[int, int]] = 'bl', | |
alpha : float = 0.75, | |
): | |
''' | |
Annotate the video frames with the given text. | |
### Args | |
- frames: np.ndarray, (L, H, W, 3) | |
- text: str | |
- pos: str or tuple(int, int), default 'bl' | |
- If str, one of ['tl', 'bl']. | |
- If tuple, the position of the text. | |
- alpha: float, default 0.5 | |
- The transparency of the text. | |
### Returns | |
- np.ndarray, (L, H, W, 3) | |
- The annotated video. | |
''' | |
assert len(frames.shape) == 4, 'frames must have 4 dimensions.' | |
frames = frames.copy() | |
L, H, W = frames.shape[:3] | |
if isinstance(pos, str): | |
if pos == 'tl': | |
offset = (int(0.1 * W), int(0.1 * H)) | |
elif pos == 'bl': | |
offset = (int(0.1 * W), int(0.9 * H)) | |
else: | |
raise ValueError(f'Invalid position: {pos}') | |
else: | |
offset = pos | |
for i, frame in enumerate(frames): | |
overlay = frame.copy() | |
_put_text(overlay, text, offset) | |
frames[i] = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0) | |
return frames | |
def draw_bbx_on_img( | |
img : np.ndarray, | |
lurb : np.ndarray, | |
color : str = 'red', | |
): | |
''' | |
Draw the bounding box on the image. | |
### Args | |
- img: np.ndarray, (H, W, 3) | |
- lurb: np.ndarray, (4,) | |
- The bounding box in the format of left, up, right, bottom. | |
- color: str, default 'red' | |
### Returns | |
- np.ndarray, (H, W, 3) | |
- The image with the bounding box. | |
''' | |
assert len(img.shape) == 3, 'img must have 3 dimensions.' | |
img = img.copy() | |
l, u, r, b = lurb.astype(int) | |
color_rgb_int8 = ColorPalette.presets_int8[color] | |
cv2.rectangle(img, (l, u), (r, b), color_rgb_int8, 3) | |
return img | |
def draw_bbx_on_video( | |
frames : np.ndarray, | |
lurbs : np.ndarray, | |
color : str = 'red', | |
): | |
''' | |
Draw the bounding box on the video frames. | |
### Args | |
- frames: np.ndarray, (L, H, W, 3) | |
- lurbs: np.ndarray, (L, 4,) | |
- The bounding box in the format of left, up, right, bottom. | |
- color: str, default 'red' | |
### Returns | |
- np.ndarray, (L, H, W, 3) | |
- The video with the bounding box. | |
''' | |
assert len(frames.shape) == 4, 'frames must have 4 dimensions.' | |
frames = frames.copy() | |
for i, frame in enumerate(frames): | |
frames[i] = draw_bbx_on_img(frame, lurbs[i], color) | |
return frames | |
def draw_kp2d_on_img( | |
img : np.ndarray, | |
kp2d : Union[np.ndarray, torch.Tensor], | |
links : list = [], | |
link_colors : list = [], | |
show_conf : bool = False, | |
show_idx : bool = False, | |
): | |
''' | |
Draw the 2d keypoints (and connection lines if exists) on the image. | |
### Args | |
- img: np.ndarray, (H, W, 3) | |
- The image. | |
- kp2d: np.ndarray or torch.Tensor, (N, 2) or (N, 3) | |
- The 2d keypoints without/with confidence. | |
- links: list of [int, int] or (int, int), default [] | |
- The connections between keypoints. Each element is a tuple of two indices. | |
- If empty, only keypoints will be drawn. | |
- link_colors: list of [int, int, int] or (int, int, int), default [] | |
- The colors of the connections. | |
- If empty, the connections will be drawn in white. | |
- show_conf: bool, default False | |
- Whether to show the confidence of keypoints. | |
- show_idx: bool, default False | |
- Whether to show the index of keypoints. | |
### Returns | |
- img: np.ndarray, (H, W, 3) | |
- The image with skeleton. | |
''' | |
img = img.copy() | |
kp2d = to_numpy(kp2d) # (N, 2) or (N, 3) | |
assert len(img.shape) == 3, f'`img`\'s shape should be (H, W, 3) but got {img.shape}' | |
assert len(kp2d.shape) == 2, f'`kp2d`\'s shape should be (N, 2) or (N, 3) but got {kp2d.shape}' | |
if kp2d.shape[1] == 2: | |
kp2d = np.concatenate([kp2d, np.ones((kp2d.shape[0], 1))], axis=-1) # (N, 3) | |
kp_has_drawn = [False] * kp2d.shape[0] | |
# Draw connections. | |
for lid, link in enumerate(links): | |
# Skip links related to impossible keypoints. | |
if kp2d[link[0], 2] < 0.5 or kp2d[link[1], 2] < 0.5: | |
continue | |
pt1 = tuple(kp2d[link[0], :2].astype(int)) | |
pt2 = tuple(kp2d[link[1], :2].astype(int)) | |
color = (255, 255, 255) if len(link_colors) == 0 else tuple(link_colors[lid]) | |
cv2.line(img, pt1, pt2, color, 2) | |
if not kp_has_drawn[link[0]]: | |
cv2.circle(img, pt1, 3, color, -1) | |
if not kp_has_drawn[link[1]]: | |
cv2.circle(img, pt2, 3, color, -1) | |
kp_has_drawn[link[0]] = kp_has_drawn[link[1]] = True | |
# Draw keypoints and annotate the confidence. | |
for i, kp in enumerate(kp2d): | |
conf = kp[2] | |
pos = tuple(kp[:2].astype(int)) | |
if not kp_has_drawn[i]: | |
cv2.circle(img, pos, 4, (255, 255, 255), -1) | |
cv2.circle(img, pos, 2, ( 0, 255, 0), -1) | |
kp_has_drawn[i] = True | |
if show_conf: | |
_put_text(img, f'{conf:.2f}', pos) | |
if show_idx: | |
if i >= 40: | |
continue | |
_put_text(img, f'{i}', pos, scale=0.03) | |
return img | |
# ====== Internal Utils ====== | |
def _put_text( | |
img : np.ndarray, | |
text : str, | |
pos : Tuple[int, int], | |
scale : float = 0.05, | |
color_inside : Tuple[int, int, int] = ColorPalette.presets_int8['black'], | |
color_stroke : Tuple[int, int, int] = ColorPalette.presets_int8['white'], | |
**kwargs | |
): | |
fontFace = cv2.FONT_HERSHEY_SIMPLEX | |
if 'fontFace' in kwargs: | |
fontFace = kwargs['fontFace'] | |
kwargs.pop('fontFace') | |
H, W = img.shape[:2] | |
# https://stackoverflow.com/a/55772676/22331129 | |
font_scale = scale * min(H, W) / 25 * 1.5 | |
thickness_inside = max(int(font_scale), 1) | |
thickness_stroke = max(int(font_scale * 6), 6) | |
# Deal with the multi-line text. | |
((fw, fh), baseline) = cv2.getTextSize( | |
text = text, | |
fontFace = fontFace, | |
fontScale = font_scale, | |
thickness = thickness_stroke, | |
) # https://stackoverflow.com/questions/73664883/opencv-python-draw-text-with-fontsize-in-pixels | |
lines = text.split('\n') | |
line_height = baseline + fh | |
for i, line in enumerate(lines): | |
pos_ = (pos[0], pos[1] + line_height * i) | |
cv2.putText(img, line, pos_, fontFace, font_scale, color_stroke, thickness_stroke) | |
cv2.putText(img, line, pos_, fontFace, font_scale, color_inside, thickness_inside) |