|
from mmdet.registry import TRANSFORMS |
|
import mmcv |
|
import numpy as np |
|
from mmdet.structures.bbox.box_type import autocast_box_type |
|
from mmcv.image.geometric import _scale_size |
|
from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union |
|
from mmcv.transforms.base import BaseTransform |
|
|
|
|
|
|
|
def rescale_size(old_size: tuple, |
|
scale: Union[float, int, tuple], |
|
return_scale: bool = False) -> tuple: |
|
"""Calculate the new size to be rescaled to. |
|
|
|
Args: |
|
old_size (tuple[int]): The old size (w, h) of image. |
|
scale (float | tuple[int]): The scaling factor or maximum size. |
|
If it is a float number, then the image will be rescaled by this |
|
factor, else if it is a tuple of 2 integers, then the image will |
|
be rescaled as large as possible within the scale. |
|
return_scale (bool): Whether to return the scaling factor besides the |
|
rescaled image size. |
|
|
|
Returns: |
|
tuple[int]: The new rescaled image size. |
|
""" |
|
w, h = old_size |
|
if isinstance(scale, (float, int)): |
|
if scale <= 0: |
|
raise ValueError(f'Invalid scale {scale}, must be positive.') |
|
scale_factor = scale |
|
elif isinstance(scale, tuple): |
|
max_long_edge = max(scale) |
|
max_short_edge = min(scale) |
|
scale_factor = min(max_long_edge / max(h, w), |
|
max_short_edge / min(h, w)) |
|
else: |
|
raise TypeError( |
|
f'Scale must be a number or tuple of int, but got {type(scale)}') |
|
|
|
new_size = _scale_size((w, h), scale_factor) |
|
|
|
if return_scale: |
|
return new_size, scale_factor |
|
else: |
|
return new_size |
|
|
|
|
|
def hsiresize( |
|
img: np.ndarray, |
|
size: Tuple[int, int], |
|
return_scale: bool = False, |
|
) -> Union[Tuple[np.ndarray, float, float], np.ndarray]: |
|
"""Resize image to a given size. |
|
|
|
Args: |
|
img (ndarray): The input image. |
|
size (tuple[int]): Target size (w, h). |
|
return_scale (bool): Whether to return `w_scale` and `h_scale`. |
|
interpolation (str): Interpolation method, accepted values are |
|
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' |
|
backend, "nearest", "bilinear" for 'pillow' backend. |
|
out (ndarray): The output destination. |
|
backend (str | None): The image resize backend type. Options are `cv2`, |
|
`pillow`, `None`. If backend is None, the global imread_backend |
|
specified by ``mmcv.use_backend()`` will be used. Default: None. |
|
|
|
Returns: |
|
tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or |
|
`resized_img`. |
|
""" |
|
h, w = img.shape[:2] |
|
|
|
w_scale = int(size[0] / w) |
|
h_scale = int(size[1] / h) |
|
resized_img = np.repeat(np.repeat(img, w_scale, axis=0), h_scale, axis=1) |
|
if not return_scale: |
|
return resized_img |
|
else: |
|
return resized_img, w_scale, h_scale |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class HSIResize(BaseTransform): |
|
"""Resize images & bbox & seg & keypoints. |
|
|
|
This transform resizes the input image according to ``scale`` or |
|
``scale_factor``. Bboxes, seg map and keypoints are then resized with the |
|
same scale factor. |
|
if ``scale`` and ``scale_factor`` are both set, it will use ``scale`` to |
|
resize. |
|
|
|
Required Keys: |
|
|
|
- img |
|
- gt_bboxes (optional) |
|
- gt_seg_map (optional) |
|
- gt_keypoints (optional) |
|
|
|
Modified Keys: |
|
|
|
- img |
|
- gt_bboxes |
|
- gt_seg_map |
|
- gt_keypoints |
|
- img_shape |
|
|
|
Added Keys: |
|
|
|
- scale |
|
- scale_factor |
|
- keep_ratio |
|
|
|
Args: |
|
scale (int or tuple): Images scales for resizing. Defaults to None |
|
scale_factor (float or tuple[float]): Scale factors for resizing. |
|
Defaults to None. |
|
keep_ratio (bool): Whether to keep the aspect ratio when resizing the |
|
image. Defaults to False. |
|
clip_object_border (bool): Whether to clip the objects |
|
outside the border of the image. In some dataset like MOT17, the gt |
|
bboxes are allowed to cross the border of images. Therefore, we |
|
don't need to clip the gt bboxes in these cases. Defaults to True. |
|
backend (str): Image resize backend, choices are 'cv2' and 'pillow'. |
|
These two backends generates slightly different results. Defaults |
|
to 'cv2'. |
|
interpolation (str): Interpolation method, accepted values are |
|
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' |
|
backend, "nearest", "bilinear" for 'pillow' backend. Defaults |
|
to 'bilinear'. |
|
""" |
|
|
|
def __init__(self, |
|
scale: Optional[Union[int, Tuple[int, int]]] = None, |
|
scale_factor: Optional[Union[int, Tuple[int, |
|
int]]] = None, |
|
keep_ratio: bool = False, |
|
clip_object_border: bool = True, |
|
backend: str = 'cv2', |
|
interpolation='bilinear') -> None: |
|
assert scale is not None or scale_factor is not None, ( |
|
'`scale` and' |
|
'`scale_factor` can not both be `None`') |
|
assert scale is None, ('please input scale_factor instead of scale' ) |
|
|
|
if scale is None: |
|
self.scale = None |
|
else: |
|
if isinstance(scale, int): |
|
self.scale = (scale, scale) |
|
else: |
|
self.scale = scale |
|
self.backend = backend |
|
self.interpolation = interpolation |
|
self.keep_ratio = keep_ratio |
|
self.clip_object_border = clip_object_border |
|
if scale_factor is None: |
|
self.scale_factor = None |
|
elif isinstance(scale_factor, int): |
|
self.scale_factor = (scale_factor, scale_factor) |
|
elif isinstance(scale_factor, tuple): |
|
assert (len(scale_factor)) == 2 |
|
self.scale_factor = scale_factor |
|
else: |
|
raise TypeError( |
|
f'expect scale_factor is float or Tuple(float), but' |
|
f'get {type(scale_factor)}') |
|
|
|
|
|
|
|
def _resize_img(self, results: dict) -> None: |
|
"""Resize images with ``results['scale']``.""" |
|
|
|
if results.get('img', None) is not None: |
|
if self.keep_ratio: |
|
h, w = results['img'].shape[:2] |
|
|
|
new_size, scale_factor = rescale_size((w, h), results['scale'], return_scale=True) |
|
img, w_scale, h_scale = hsiresize(results['img'], new_size, return_scale=True,) |
|
else: |
|
img, w_scale, h_scale = hsiresize(results['img'], results['scale'],return_scale=True,) |
|
results['img'] = img |
|
results['img_shape'] = img.shape[:2] |
|
results['scale_factor'] = (w_scale, h_scale) |
|
results['keep_ratio'] = self.keep_ratio |
|
|
|
def _resize_masks(self, results: dict) -> None: |
|
"""Resize masks with ``results['scale']``""" |
|
if results.get('gt_masks', None) is not None: |
|
if self.keep_ratio: |
|
results['gt_masks'] = results['gt_masks'].rescale( |
|
results['scale']) |
|
else: |
|
results['gt_masks'] = results['gt_masks'].resize( |
|
results['img_shape']) |
|
|
|
def _resize_bboxes(self, results: dict) -> None: |
|
"""Resize bounding boxes with ``results['scale_factor']``.""" |
|
if results.get('gt_bboxes', None) is not None: |
|
results['gt_bboxes'].rescale_(results['scale_factor']) |
|
if self.clip_object_border: |
|
results['gt_bboxes'].clip_(results['img_shape']) |
|
|
|
def _resize_seg(self, results: dict) -> None: |
|
"""Resize semantic segmentation map with ``results['scale']``.""" |
|
if results.get('gt_seg_map', None) is not None: |
|
if self.keep_ratio: |
|
gt_seg = mmcv.imrescale( |
|
results['gt_seg_map'], |
|
results['scale'], |
|
interpolation='nearest', |
|
backend=self.backend) |
|
else: |
|
gt_seg = mmcv.imresize( |
|
results['gt_seg_map'], |
|
results['scale'], |
|
interpolation='nearest', |
|
backend=self.backend) |
|
results['gt_seg_map'] = gt_seg |
|
|
|
def _resize_keypoints(self, results: dict) -> None: |
|
"""Resize keypoints with ``results['scale_factor']``.""" |
|
if results.get('gt_keypoints', None) is not None: |
|
keypoints = results['gt_keypoints'] |
|
|
|
keypoints[:, :, :2] = keypoints[:, :, :2] * np.array( |
|
results['scale_factor']) |
|
if self.clip_object_border: |
|
keypoints[:, :, 0] = np.clip(keypoints[:, :, 0], 0, |
|
results['img_shape'][1]) |
|
keypoints[:, :, 1] = np.clip(keypoints[:, :, 1], 0, |
|
results['img_shape'][0]) |
|
results['gt_keypoints'] = keypoints |
|
|
|
def _record_homography_matrix(self, results: dict) -> None: |
|
"""Record the homography matrix for the Resize.""" |
|
w_scale, h_scale = results['scale_factor'] |
|
homography_matrix = np.array( |
|
[[w_scale, 0, 0], [0, h_scale, 0], [0, 0, 1]], dtype=np.float32) |
|
if results.get('homography_matrix', None) is None: |
|
results['homography_matrix'] = homography_matrix |
|
else: |
|
results['homography_matrix'] = homography_matrix @ results[ |
|
'homography_matrix'] |
|
|
|
@autocast_box_type() |
|
def transform(self, results: dict) -> dict: |
|
"""Transform function to resize images, bounding boxes and semantic |
|
segmentation map. |
|
|
|
Args: |
|
results (dict): Result dict from loading pipeline. |
|
Returns: |
|
dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map', |
|
'scale', 'scale_factor', 'height', 'width', and 'keep_ratio' keys |
|
are updated in result dict. |
|
""" |
|
if self.scale: |
|
results['scale'] = self.scale |
|
else: |
|
img_shape = results['img'].shape[:2] |
|
results['scale'] = _scale_size(img_shape[::-1], self.scale_factor) |
|
self._resize_img(results) |
|
self._resize_bboxes(results) |
|
self._resize_masks(results) |
|
self._resize_seg(results) |
|
self._record_homography_matrix(results) |
|
return results |
|
|
|
def __repr__(self) -> str: |
|
repr_str = self.__class__.__name__ |
|
repr_str += f'(scale={self.scale}, ' |
|
repr_str += f'scale_factor={self.scale_factor}, ' |
|
repr_str += f'keep_ratio={self.keep_ratio}, ' |
|
repr_str += f'clip_object_border={self.clip_object_border}), ' |
|
repr_str += f'backend={self.backend}), ' |
|
repr_str += f'interpolation={self.interpolation})' |
|
return repr_str |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class ResizePiexlTarget(BaseTransform): |
|
"""Resize images & bbox & seg & keypoints. |
|
|
|
This transform resizes the input image according to ``scale`` or |
|
``scale_factor``. Bboxes, seg map and keypoints are then resized with the |
|
same scale factor. |
|
if ``scale`` and ``scale_factor`` are both set, it will use ``scale`` to |
|
resize. |
|
|
|
Required Keys: |
|
|
|
- img |
|
- gt_bboxes (optional) |
|
- gt_seg_map (optional) |
|
- gt_keypoints (optional) |
|
|
|
Modified Keys: |
|
|
|
- img |
|
- gt_bboxes |
|
- gt_seg_map |
|
- gt_keypoints |
|
- img_shape |
|
|
|
Added Keys: |
|
|
|
- scale |
|
- scale_factor |
|
- keep_ratio |
|
|
|
Args: |
|
scale (int or tuple): Images scales for resizing. Defaults to None |
|
scale_factor (float or tuple[float]): Scale factors for resizing. |
|
Defaults to None. |
|
keep_ratio (bool): Whether to keep the aspect ratio when resizing the |
|
image. Defaults to False. |
|
clip_object_border (bool): Whether to clip the objects |
|
outside the border of the image. In some dataset like MOT17, the gt |
|
bboxes are allowed to cross the border of images. Therefore, we |
|
don't need to clip the gt bboxes in these cases. Defaults to True. |
|
backend (str): Image resize backend, choices are 'cv2' and 'pillow'. |
|
These two backends generates slightly different results. Defaults |
|
to 'cv2'. |
|
interpolation (str): Interpolation method, accepted values are |
|
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' |
|
backend, "nearest", "bilinear" for 'pillow' backend. Defaults |
|
to 'bilinear'. |
|
""" |
|
|
|
def __init__(self, |
|
scale_factor: float = 1.0, |
|
clip_object_border: bool = True,) -> None: |
|
self.clip_object_border = clip_object_border |
|
self.scale_factor = scale_factor |
|
self.keep_ratio = True |
|
|
|
def _resize_img(self, results: dict) -> None: |
|
"""Resize images with ``results['scale']``.""" |
|
h, w = results['img'].shape[:2] |
|
new_size = _scale_size((w, h), self.scale_factor) |
|
img, w_scale, h_scale = hsiresize(results['img'], new_size, return_scale=True,) |
|
results['img'] = img |
|
results['img_shape'] = img.shape[:2] |
|
results['scale_factor'] = (w_scale, h_scale) |
|
results['keep_ratio'] = self.keep_ratio |
|
|
|
def _resize_bboxes(self, results: dict) -> None: |
|
"""Resize bounding boxes with ``results['scale_factor']``.""" |
|
if results.get('gt_bboxes', None) is not None: |
|
results['gt_bboxes'].rescale_(results['scale_factor']) |
|
if self.clip_object_border: |
|
results['gt_bboxes'].clip_(results['img_shape']) |
|
|
|
def _resize_masks(self, results: dict) -> None: |
|
"""Resize masks with ``results['scale']``""" |
|
if results.get('gt_masks', None) is not None: |
|
if self.keep_ratio: |
|
results['gt_masks'] = results['gt_masks'].rescale( |
|
results['scale']) |
|
else: |
|
results['gt_masks'] = results['gt_masks'].resize( |
|
results['img_shape']) |
|
|
|
def _resize_seg(self, results: dict) -> None: |
|
"""Resize semantic segmentation map with ``results['scale']``.""" |
|
if results.get('gt_seg', None) is not None: |
|
h, w = results['gt_seg'].shape[:2] |
|
new_size = _scale_size((w, h), self.scale_factor) |
|
gt_seg = hsiresize(results['gt_seg'], new_size, return_scale=False,) |
|
results['gt_seg'] = gt_seg |
|
|
|
def _resize_abu(self, results: dict) -> None: |
|
"""Resize semantic segmentation map with ``results['scale']``.""" |
|
if results.get('gt_abu', None) is not None: |
|
h, w = results['gt_abu'].shape[:2] |
|
new_size = _scale_size((w, h), self.scale_factor) |
|
gt_abu = hsiresize(results['gt_abu'], new_size, return_scale=False,) |
|
results['gt_abu'] = gt_abu |
|
|
|
def _record_homography_matrix(self, results: dict) -> None: |
|
"""Record the homography matrix for the Resize.""" |
|
w_scale, h_scale = results['scale_factor'] |
|
homography_matrix = np.array( |
|
[[w_scale, 0, 0], [0, h_scale, 0], [0, 0, 1]], dtype=np.float32) |
|
if results.get('homography_matrix', None) is None: |
|
results['homography_matrix'] = homography_matrix |
|
else: |
|
results['homography_matrix'] = homography_matrix @ results[ |
|
'homography_matrix'] |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Transform function to resize images, bounding boxes, semantic |
|
segmentation map and keypoints. |
|
|
|
Args: |
|
results (dict): Result dict from loading pipeline. |
|
Returns: |
|
dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map', |
|
'gt_keypoints', 'scale', 'scale_factor', 'img_shape', |
|
and 'keep_ratio' keys are updated in result dict. |
|
""" |
|
|
|
|
|
|
|
|
|
img_shape = results['img'].shape[:2] |
|
results['scale'] = _scale_size(img_shape[::-1], |
|
self.scale_factor) |
|
self._resize_img(results) |
|
self._resize_bboxes(results) |
|
self._resize_masks(results) |
|
self._resize_seg(results) |
|
self._resize_abu(results) |
|
self._record_homography_matrix(results) |
|
return results |
|
|
|
def __repr__(self): |
|
repr_str = self.__class__.__name__ |
|
repr_str += f'scale_factor={self.scale_factor}, ' |
|
return repr_str |
|
|
|
|