|
|
|
from typing import Optional, Tuple, Union |
|
|
|
import mmcv |
|
import numpy as np |
|
import pycocotools.mask as maskUtils |
|
import torch |
|
from mmcv.transforms import BaseTransform |
|
from mmcv.transforms import LoadAnnotations as MMCV_LoadAnnotations |
|
from mmcv.transforms import LoadImageFromFile |
|
from mmengine.fileio import get |
|
from mmengine.structures import BaseDataElement |
|
|
|
from mmdet.registry import TRANSFORMS |
|
from mmdet.structures.bbox import get_box_type |
|
from mmdet.structures.bbox.box_type import autocast_box_type |
|
from mmdet.structures.mask import BitmapMasks, PolygonMasks |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class LoadAnnotations(MMCV_LoadAnnotations): |
|
"""Load and process the ``instances`` and ``seg_map`` annotation provided |
|
by dataset. |
|
|
|
The annotation format is as the following: |
|
|
|
.. code-block:: python |
|
|
|
{ |
|
'instances': |
|
[ |
|
{ |
|
# List of 4 numbers representing the bounding box of the |
|
# instance, in (x1, y1, x2, y2) order. |
|
'bbox': [x1, y1, x2, y2], |
|
|
|
# Label of image classification. |
|
'bbox_label': 1, |
|
|
|
# Used in instance/panoptic segmentation. The segmentation mask |
|
# of the instance or the information of segments. |
|
# 1. If list[list[float]], it represents a list of polygons, |
|
# one for each connected component of the object. Each |
|
# list[float] is one simple polygon in the format of |
|
# [x1, y1, ..., xn, yn] (n≥3). The Xs and Ys are absolute |
|
# coordinates in unit of pixels. |
|
# 2. If dict, it represents the per-pixel segmentation mask in |
|
# COCO’s compressed RLE format. The dict should have keys |
|
# “size” and “counts”. Can be loaded by pycocotools |
|
'mask': list[list[float]] or dict, |
|
|
|
} |
|
] |
|
# Filename of semantic or panoptic segmentation ground truth file. |
|
'seg_map_path': 'a/b/c' |
|
} |
|
|
|
After this module, the annotation has been changed to the format below: |
|
|
|
.. code-block:: python |
|
|
|
{ |
|
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes |
|
# in an image |
|
'gt_bboxes': BaseBoxes(N, 4) |
|
# In int type. |
|
'gt_bboxes_labels': np.ndarray(N, ) |
|
# In built-in class |
|
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W) |
|
# In uint8 type. |
|
'gt_seg_map': np.ndarray (H, W) |
|
# in (x, y, v) order, float type. |
|
} |
|
|
|
Required Keys: |
|
|
|
- height |
|
- width |
|
- instances |
|
|
|
- bbox (optional) |
|
- bbox_label |
|
- mask (optional) |
|
- ignore_flag |
|
|
|
- seg_map_path (optional) |
|
|
|
Added Keys: |
|
|
|
- gt_bboxes (BaseBoxes[torch.float32]) |
|
- gt_bboxes_labels (np.int64) |
|
- gt_masks (BitmapMasks | PolygonMasks) |
|
- gt_seg_map (np.uint8) |
|
- gt_ignore_flags (bool) |
|
|
|
Args: |
|
with_bbox (bool): Whether to parse and load the bbox annotation. |
|
Defaults to True. |
|
with_label (bool): Whether to parse and load the label annotation. |
|
Defaults to True. |
|
with_mask (bool): Whether to parse and load the mask annotation. |
|
Default: False. |
|
with_seg (bool): Whether to parse and load the semantic segmentation |
|
annotation. Defaults to False. |
|
poly2mask (bool): Whether to convert mask to bitmap. Default: True. |
|
box_type (str): The box type used to wrap the bboxes. If ``box_type`` |
|
is None, gt_bboxes will keep being np.ndarray. Defaults to 'hbox'. |
|
imdecode_backend (str): The image decoding backend type. The backend |
|
argument for :func:``mmcv.imfrombytes``. |
|
See :fun:``mmcv.imfrombytes`` for details. |
|
Defaults to 'cv2'. |
|
backend_args (dict, optional): Arguments to instantiate the |
|
corresponding backend. Defaults to None. |
|
""" |
|
|
|
def __init__(self, |
|
with_mask: bool = False, |
|
poly2mask: bool = True, |
|
box_type: str = 'hbox', |
|
**kwargs) -> None: |
|
super(LoadAnnotations, self).__init__(**kwargs) |
|
self.with_mask = with_mask |
|
self.poly2mask = poly2mask |
|
self.box_type = box_type |
|
|
|
def _load_bboxes(self, results: dict) -> None: |
|
"""Private function to load bounding box annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
Returns: |
|
dict: The dict contains loaded bounding box annotations. |
|
""" |
|
gt_bboxes = [] |
|
gt_ignore_flags = [] |
|
for instance in results.get('instances', []): |
|
gt_bboxes.append(instance['bbox']) |
|
gt_ignore_flags.append(instance['ignore_flag']) |
|
if self.box_type is None: |
|
results['gt_bboxes'] = np.array( |
|
gt_bboxes, dtype=np.float32).reshape((-1, 4)) |
|
else: |
|
_, box_type_cls = get_box_type(self.box_type) |
|
results['gt_bboxes'] = box_type_cls(gt_bboxes, dtype=torch.float32) |
|
results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool) |
|
|
|
def _load_labels(self, results: dict) -> None: |
|
"""Private function to load label annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
|
|
Returns: |
|
dict: The dict contains loaded label annotations. |
|
""" |
|
gt_bboxes_labels = [] |
|
for instance in results.get('instances', []): |
|
gt_bboxes_labels.append(instance['bbox_label']) |
|
|
|
results['gt_bboxes_labels'] = np.array( |
|
gt_bboxes_labels, dtype=np.int64) |
|
|
|
def _poly2mask(self, mask_ann: Union[list, dict], img_h: int, |
|
img_w: int) -> np.ndarray: |
|
"""Private function to convert masks represented with polygon to |
|
bitmaps. |
|
|
|
Args: |
|
mask_ann (list | dict): Polygon mask annotation input. |
|
img_h (int): The height of output mask. |
|
img_w (int): The width of output mask. |
|
|
|
Returns: |
|
np.ndarray: The decode bitmap mask of shape (img_h, img_w). |
|
""" |
|
|
|
if isinstance(mask_ann, list): |
|
|
|
|
|
rles = maskUtils.frPyObjects(mask_ann, img_h, img_w) |
|
rle = maskUtils.merge(rles) |
|
elif isinstance(mask_ann['counts'], list): |
|
|
|
rle = maskUtils.frPyObjects(mask_ann, img_h, img_w) |
|
else: |
|
|
|
rle = mask_ann |
|
mask = maskUtils.decode(rle) |
|
return mask |
|
|
|
def _process_masks(self, results: dict) -> list: |
|
"""Process gt_masks and filter invalid polygons. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
|
|
Returns: |
|
list: Processed gt_masks. |
|
""" |
|
gt_masks = [] |
|
gt_ignore_flags = [] |
|
for instance in results.get('instances', []): |
|
gt_mask = instance['mask'] |
|
|
|
|
|
if isinstance(gt_mask, list): |
|
gt_mask = [ |
|
np.array(polygon) for polygon in gt_mask |
|
if len(polygon) % 2 == 0 and len(polygon) >= 6 |
|
] |
|
if len(gt_mask) == 0: |
|
|
|
instance['ignore_flag'] = 1 |
|
gt_mask = [np.zeros(6)] |
|
elif not self.poly2mask: |
|
|
|
|
|
instance['ignore_flag'] = 1 |
|
gt_mask = [np.zeros(6)] |
|
elif isinstance(gt_mask, dict) and \ |
|
not (gt_mask.get('counts') is not None and |
|
gt_mask.get('size') is not None and |
|
isinstance(gt_mask['counts'], (list, str))): |
|
|
|
|
|
instance['ignore_flag'] = 1 |
|
gt_mask = [np.zeros(6)] |
|
gt_masks.append(gt_mask) |
|
|
|
gt_ignore_flags.append(instance['ignore_flag']) |
|
results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool) |
|
return gt_masks |
|
|
|
def _load_masks(self, results: dict) -> None: |
|
"""Private function to load mask annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
""" |
|
h, w = results['ori_shape'] |
|
gt_masks = self._process_masks(results) |
|
if self.poly2mask: |
|
gt_masks = BitmapMasks( |
|
[self._poly2mask(mask, h, w) for mask in gt_masks], h, w) |
|
else: |
|
|
|
gt_masks = PolygonMasks([mask for mask in gt_masks], h, w) |
|
results['gt_masks'] = gt_masks |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Function to load multiple types annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
|
|
Returns: |
|
dict: The dict contains loaded bounding box, label and |
|
semantic segmentation. |
|
""" |
|
|
|
if self.with_bbox: |
|
self._load_bboxes(results) |
|
if self.with_label: |
|
self._load_labels(results) |
|
if self.with_mask: |
|
self._load_masks(results) |
|
if self.with_seg: |
|
self._load_seg_map(results) |
|
return results |
|
|
|
def __repr__(self) -> str: |
|
repr_str = self.__class__.__name__ |
|
repr_str += f'(with_bbox={self.with_bbox}, ' |
|
repr_str += f'with_label={self.with_label}, ' |
|
repr_str += f'with_mask={self.with_mask}, ' |
|
repr_str += f'with_seg={self.with_seg}, ' |
|
repr_str += f'poly2mask={self.poly2mask}, ' |
|
repr_str += f"imdecode_backend='{self.imdecode_backend}', " |
|
repr_str += f'backend_args={self.backend_args})' |
|
return repr_str |
|
|
|
|
|
|