|
from mmcv.transforms import to_tensor |
|
from mmengine.structures import InstanceData, PixelData |
|
from mmdet.structures import DetDataSample |
|
from mmdet.structures.bbox import BaseBoxes |
|
import mmengine.fileio as fileio |
|
from typing import Optional, Tuple, Union |
|
import mmcv |
|
import numpy as np |
|
import pycocotools.mask as maskUtils |
|
import torch |
|
from mmcv.transforms import BaseTransform |
|
from mmcv.transforms import LoadAnnotations as MMCV_LoadAnnotations |
|
from mmdet.registry import TRANSFORMS |
|
from mmdet.structures.bbox import get_box_type |
|
from mmdet.structures.mask import BitmapMasks, PolygonMasks |
|
import scipy.io as sio |
|
|
|
def hsifromfile(img_path, backend='npy' ) -> np.ndarray: |
|
"""Read an image from bytes. |
|
|
|
Args: |
|
backend (str | None): The image decoding backend type. |
|
Returns: |
|
ndarray: Loaded image array. |
|
|
|
Examples: |
|
""" |
|
if backend =='npy': |
|
img = np.load(img_path) |
|
return img |
|
|
|
@TRANSFORMS.register_module() |
|
class LoadHyperspectralImageFromFiles(BaseTransform): |
|
"""Load multi-channel images from a list of separate channel files. |
|
|
|
Required Keys: |
|
|
|
- img_path |
|
|
|
Modified Keys: |
|
|
|
- img |
|
- img_shape |
|
- ori_shape |
|
|
|
Args: |
|
to_float32 (bool): Whether to convert the loaded image to a float32 |
|
numpy array. If set to False, the loaded image is an uint8 array. |
|
Defaults to False. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
to_float32: bool = False, |
|
normalized_basis = None, |
|
) -> None: |
|
self.to_float32 = to_float32 |
|
self.normalized_basis = normalized_basis |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Transform functions to load multiple images and get images meta |
|
information. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:`mmdet.CustomDataset`. |
|
|
|
Returns: |
|
dict: The dict contains loaded images and meta information. |
|
""" |
|
|
|
img = hsifromfile(results['img_path']) |
|
|
|
|
|
|
|
|
|
|
|
|
|
if self.normalized_basis == None: |
|
img = img/500 |
|
else: |
|
img = img/np.array(self.normalized_basis) |
|
if self.to_float32: |
|
img = img.astype(np.float32) |
|
|
|
results['img'] = img |
|
results['img_shape'] = img.shape[:2] |
|
results['ori_shape'] = img.shape[:2] |
|
return results |
|
|
|
def __repr__(self): |
|
repr_str = (f'{self.__class__.__name__}(' |
|
f'to_float32={self.to_float32}, ') |
|
return repr_str |
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class LoadAnnotationsPiexlTarget(MMCV_LoadAnnotations): |
|
"""Load and process the ``instances`` and ``seg_map`` annotation provided |
|
by dataset. |
|
|
|
The annotation format is as the following: |
|
|
|
.. code-block:: python |
|
|
|
{ |
|
'instances': |
|
[ |
|
{ |
|
# List of 4 numbers representing the bounding box of the |
|
# instance, in (x1, y1, x2, y2) order. |
|
'bbox': [x1, y1, x2, y2], |
|
|
|
# Label of image classification. |
|
'bbox_label': 1, |
|
|
|
# Used in instance/panoptic segmentation. The segmentation mask |
|
# of the instance or the information of segments. |
|
# 1. If list[list[float]], it represents a list of polygons, |
|
# one for each connected component of the object. Each |
|
# list[float] is one simple polygon in the format of |
|
# [x1, y1, ..., xn, yn] (n≥3). The Xs and Ys are absolute |
|
# coordinates in unit of pixels. |
|
# 2. If dict, it represents the per-pixel segmentation mask in |
|
# COCO’s compressed RLE format. The dict should have keys |
|
# “size” and “counts”. Can be loaded by pycocotools |
|
'mask': list[list[float]] or dict, |
|
|
|
} |
|
] |
|
# Filename of semantic or panoptic segmentation ground truth file. |
|
'seg_map_path': 'a/b/c' |
|
} |
|
|
|
After this module, the annotation has been changed to the format below: |
|
|
|
.. code-block:: python |
|
|
|
{ |
|
# In (x1, y1, x2, y2) order, float type. N is the number of bboxes |
|
# in an image |
|
'gt_bboxes': BaseBoxes(N, 4) |
|
# In int type. |
|
'gt_bboxes_labels': np.ndarray(N, ) |
|
# In built-in class |
|
'gt_masks': PolygonMasks (H, W) or BitmapMasks (H, W) |
|
# In uint8 type. |
|
'gt_seg_map': np.ndarray (H, W) |
|
# in (x, y, v) order, float type. |
|
} |
|
|
|
Required Keys: |
|
|
|
- height |
|
- width |
|
- instances |
|
|
|
- bbox (optional) |
|
- bbox_label |
|
- mask (optional) |
|
- ignore_flag |
|
|
|
- seg_map_path (optional) |
|
|
|
Added Keys: |
|
|
|
- gt_bboxes (BaseBoxes[torch.float32]) |
|
- gt_bboxes_labels (np.int64) |
|
- gt_masks (BitmapMasks | PolygonMasks) |
|
- gt_seg_map (np.uint8) |
|
- gt_ignore_flags (bool) |
|
|
|
Args: |
|
with_bbox (bool): Whether to parse and load the bbox annotation. |
|
Defaults to True. |
|
with_label (bool): Whether to parse and load the label annotation. |
|
Defaults to True. |
|
with_mask (bool): Whether to parse and load the mask annotation. |
|
Default: False. |
|
with_seg (bool): Whether to parse and load the semantic segmentation |
|
annotation. Defaults to False. |
|
poly2mask (bool): Whether to convert mask to bitmap. Default: True. |
|
box_type (str): The box type used to wrap the bboxes. If ``box_type`` |
|
is None, gt_bboxes will keep being np.ndarray. Defaults to 'hbox'. |
|
imdecode_backend (str): The image decoding backend type. The backend |
|
argument for :func:``mmcv.imfrombytes``. |
|
See :fun:``mmcv.imfrombytes`` for details. |
|
Defaults to 'cv2'. |
|
backend_args (dict, optional): Arguments to instantiate the |
|
corresponding backend. Defaults to None. |
|
""" |
|
|
|
def __init__(self, |
|
with_mask: bool = False, |
|
with_seg: bool = False, |
|
with_abu: bool = False, |
|
poly2mask: bool = True, |
|
box_type: str = 'hbox', |
|
**kwargs) -> None: |
|
super(LoadAnnotationsPiexlTarget, self).__init__(**kwargs) |
|
self.with_mask = with_mask |
|
self.poly2mask = poly2mask |
|
self.box_type = box_type |
|
self.with_seg = with_seg |
|
self.with_abu = with_abu |
|
|
|
def _load_bboxes(self, results: dict) -> None: |
|
"""Private function to load bounding box annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
Returns: |
|
dict: The dict contains loaded bounding box annotations. |
|
""" |
|
gt_bboxes = [] |
|
gt_ignore_flags = [] |
|
for instance in results.get('instances', []): |
|
gt_bboxes.append(instance['bbox']) |
|
gt_ignore_flags.append(instance['ignore_flag']) |
|
if self.box_type is None: |
|
results['gt_bboxes'] = np.array( |
|
gt_bboxes, dtype=np.float32).reshape((-1, 4)) |
|
else: |
|
_, box_type_cls = get_box_type(self.box_type) |
|
results['gt_bboxes'] = box_type_cls(gt_bboxes, dtype=torch.float32) |
|
results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool) |
|
|
|
def _load_labels(self, results: dict) -> None: |
|
"""Private function to load label annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
|
|
Returns: |
|
dict: The dict contains loaded label annotations. |
|
""" |
|
gt_bboxes_labels = [] |
|
for instance in results.get('instances', []): |
|
gt_bboxes_labels.append(instance['bbox_label']) |
|
|
|
results['gt_bboxes_labels'] = np.array( |
|
gt_bboxes_labels, dtype=np.int64) |
|
|
|
def _poly2mask(self, mask_ann: Union[list, dict], img_h: int, |
|
img_w: int) -> np.ndarray: |
|
"""Private function to convert masks represented with polygon to |
|
bitmaps. |
|
|
|
Args: |
|
mask_ann (list | dict): Polygon mask annotation input. |
|
img_h (int): The height of output mask. |
|
img_w (int): The width of output mask. |
|
|
|
Returns: |
|
np.ndarray: The decode bitmap mask of shape (img_h, img_w). |
|
""" |
|
|
|
if isinstance(mask_ann, list): |
|
|
|
|
|
rles = maskUtils.frPyObjects(mask_ann, img_h, img_w) |
|
rle = maskUtils.merge(rles) |
|
elif isinstance(mask_ann['counts'], list): |
|
|
|
rle = maskUtils.frPyObjects(mask_ann, img_h, img_w) |
|
else: |
|
|
|
rle = mask_ann |
|
mask = maskUtils.decode(rle) |
|
return mask |
|
|
|
def _process_masks(self, results: dict) -> list: |
|
"""Process gt_masks and filter invalid polygons. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
|
|
Returns: |
|
list: Processed gt_masks. |
|
""" |
|
gt_masks = [] |
|
gt_ignore_flags = [] |
|
for instance in results.get('instances', []): |
|
gt_mask = instance['mask'] |
|
|
|
|
|
if isinstance(gt_mask, list): |
|
gt_mask = [ |
|
np.array(polygon) for polygon in gt_mask |
|
if len(polygon) % 2 == 0 and len(polygon) >= 6 |
|
] |
|
if len(gt_mask) == 0: |
|
|
|
instance['ignore_flag'] = 1 |
|
gt_mask = [np.zeros(6)] |
|
elif not self.poly2mask: |
|
|
|
|
|
instance['ignore_flag'] = 1 |
|
gt_mask = [np.zeros(6)] |
|
elif isinstance(gt_mask, dict) and \ |
|
not (gt_mask.get('counts') is not None and |
|
gt_mask.get('size') is not None and |
|
isinstance(gt_mask['counts'], (list, str))): |
|
|
|
|
|
instance['ignore_flag'] = 1 |
|
gt_mask = [np.zeros(6)] |
|
gt_masks.append(gt_mask) |
|
|
|
gt_ignore_flags.append(instance['ignore_flag']) |
|
results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool) |
|
return gt_masks |
|
|
|
def _load_masks(self, results: dict) -> None: |
|
"""Private function to load mask annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
""" |
|
h, w = results['ori_shape'] |
|
gt_masks = self._process_masks(results) |
|
if self.poly2mask: |
|
gt_masks = BitmapMasks( |
|
[self._poly2mask(mask, h, w) for mask in gt_masks], h, w) |
|
else: |
|
|
|
gt_masks = PolygonMasks([mask for mask in gt_masks], h, w) |
|
results['gt_masks'] = gt_masks |
|
|
|
def _load_seg_map(self, results: dict) -> None: |
|
"""Private function to load semantic segmentation annotations. |
|
|
|
Args: |
|
results (dict): Result dict from |
|
:class:`mmengine.dataset.BaseDataset`. |
|
|
|
Returns: |
|
dict: The dict contains loaded semantic segmentation annotations. |
|
""" |
|
assert results['seg_path'] is not None |
|
img_bytes = fileio.get(results['seg_path']) |
|
img = mmcv.imfrombytes( |
|
img_bytes, flag='grayscale', backend='pillow') |
|
results['gt_seg'] = img.astype('float32') |
|
|
|
def _load_abu_map(self, results: dict) -> None: |
|
"""Private function to load semantic segmentation annotations. |
|
|
|
Args: |
|
results (dict): Result dict from |
|
:class:`mmengine.dataset.BaseDataset`. |
|
|
|
Returns: |
|
dict: The dict contains loaded semantic segmentation annotations. |
|
""" |
|
assert results['abu_path'] is not None |
|
img = sio.loadmat(results['abu_path'])['data'] |
|
results['gt_abu'] = img.astype('float32') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Function to load multiple types annotations. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:``mmengine.BaseDataset``. |
|
|
|
Returns: |
|
dict: The dict contains loaded bounding box, label and |
|
semantic segmentation. |
|
""" |
|
|
|
if self.with_bbox: |
|
self._load_bboxes(results) |
|
if self.with_label: |
|
self._load_labels(results) |
|
if self.with_mask: |
|
self._load_masks(results) |
|
if self.with_seg: |
|
self._load_seg_map(results) |
|
if self.with_abu: |
|
self._load_abu_map(results) |
|
|
|
return results |
|
|
|
def __repr__(self) -> str: |
|
repr_str = self.__class__.__name__ |
|
repr_str += f'(with_bbox={self.with_bbox}, ' |
|
repr_str += f'with_label={self.with_label}, ' |
|
repr_str += f'with_mask={self.with_mask}, ' |
|
repr_str += f'with_seg={self.with_seg}, ' |
|
repr_str += f'with_abu={self.with_abu}, ' |
|
repr_str += f'poly2mask={self.poly2mask}, ' |
|
repr_str += f"imdecode_backend='{self.imdecode_backend}', " |
|
repr_str += f'backend_args={self.backend_args})' |
|
return repr_str |
|
|
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
class LoadHyperspectralMaskImageFromFiles(BaseTransform): |
|
"""Load multi-channel images from a list of separate channel files. |
|
|
|
Required Keys: |
|
|
|
- img_path |
|
|
|
Modified Keys: |
|
|
|
- img |
|
- img_shape |
|
- ori_shape |
|
|
|
Args: |
|
to_float32 (bool): Whether to convert the loaded image to a float32 |
|
numpy array. If set to False, the loaded image is an uint8 array. |
|
Defaults to False. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
to_float32: bool = False, |
|
normalized_basis = None, |
|
color_type: str = 'color', |
|
imdecode_backend: str = 'cv2', |
|
backend_args: Optional[dict] = None |
|
) -> None: |
|
self.to_float32 = to_float32 |
|
self.normalized_basis = normalized_basis |
|
self.color_type = color_type |
|
self.imdecode_backend = imdecode_backend |
|
self.backend_args: Optional[dict] = None |
|
if backend_args is not None: |
|
self.backend_args = backend_args.copy() |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Transform functions to load multiple images and get images meta |
|
information. |
|
|
|
Args: |
|
results (dict): Result dict from :obj:`mmdet.CustomDataset`. |
|
|
|
Returns: |
|
dict: The dict contains loaded images and meta information. |
|
""" |
|
|
|
img = hsifromfile(results['img_path']+'_rd.npy') |
|
|
|
|
|
|
|
|
|
|
|
|
|
if self.normalized_basis == None: |
|
img = img/1000 |
|
else: |
|
img = img/np.array(self.normalized_basis) |
|
if self.to_float32: |
|
img = img.astype(np.float32) |
|
|
|
maskname = results['mask_path']+'_mask.png' |
|
mask_bytes = fileio.get( |
|
maskname, backend_args=self.backend_args) |
|
mask = mmcv.imfrombytes( |
|
mask_bytes, flag=self.color_type, backend=self.imdecode_backend) |
|
if self.to_float32: |
|
mask = mask.astype(np.float32) |
|
|
|
mask[mask == 255] = 1 |
|
mask = np.repeat(mask, 17, axis = 2) |
|
img = img * mask |
|
|
|
results['img'] = img |
|
results['img_shape'] = img.shape[:2] |
|
results['ori_shape'] = img.shape[:2] |
|
return results |
|
|
|
def __repr__(self): |
|
repr_str = (f'{self.__class__.__name__}(' |
|
f'to_float32={self.to_float32}, ') |
|
return repr_str |
|
|
|
|
|
def to_tensor_HSI( |
|
data: Union[torch.Tensor, np.ndarray]) -> torch.Tensor: |
|
"""Convert objects of various python types to :obj:`torch.Tensor`. |
|
|
|
Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, |
|
:class:`Sequence`, :class:`int` and :class:`float`. |
|
|
|
Args: |
|
data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to |
|
be converted. |
|
|
|
Returns: |
|
torch.Tensor: the converted data. |
|
""" |
|
|
|
if isinstance(data, torch.Tensor): |
|
return data |
|
elif isinstance(data, np.ndarray): |
|
|
|
if data.dtype == '>i2': |
|
return torch.from_numpy(data.astype(np.float32)) |
|
else: |
|
return torch.from_numpy(data) |
|
else: |
|
raise TypeError(f'type {type(data)} cannot be converted to tensor.') |
|
|
|
@TRANSFORMS.register_module() |
|
class PackDetInputs_HSI(BaseTransform): |
|
"""Pack the inputs data for the detection / semantic segmentation / |
|
panoptic segmentation. |
|
|
|
The ``img_meta`` item is always populated. The contents of the |
|
``img_meta`` dictionary depends on ``meta_keys``. By default this includes: |
|
|
|
- ``img_id``: id of the image |
|
|
|
- ``img_path``: path to the image file |
|
|
|
- ``ori_shape``: original shape of the image as a tuple (h, w) |
|
|
|
- ``img_shape``: shape of the image input to the network as a tuple \ |
|
(h, w). Note that images may be zero padded on the \ |
|
bottom/right if the batch tensor is larger than this shape. |
|
|
|
- ``scale_factor``: a float indicating the preprocessing scale |
|
|
|
- ``flip``: a boolean indicating if image flip transform was used |
|
|
|
- ``flip_direction``: the flipping direction |
|
|
|
Args: |
|
meta_keys (Sequence[str], optional): Meta keys to be converted to |
|
``mmcv.DataContainer`` and collected in ``data[img_metas]``. |
|
Default: ``('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor', 'flip', 'flip_direction')`` |
|
""" |
|
mapping_table = { |
|
'gt_bboxes': 'bboxes', |
|
'gt_bboxes_labels': 'labels', |
|
'gt_masks': 'masks' |
|
} |
|
|
|
def __init__(self, |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor', 'flip', 'flip_direction')): |
|
self.meta_keys = meta_keys |
|
|
|
def transform(self, results: dict) -> dict: |
|
"""Method to pack the input data. |
|
|
|
Args: |
|
results (dict): Result dict from the data pipeline. |
|
|
|
Returns: |
|
dict: |
|
|
|
- 'inputs' (obj:`torch.Tensor`): The forward data of models. |
|
- 'data_sample' (obj:`DetDataSample`): The annotation info of the |
|
sample. |
|
""" |
|
packed_results = dict() |
|
if 'img' in results: |
|
img = results['img'] |
|
if len(img.shape) < 3: |
|
img = np.expand_dims(img, -1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not img.flags.c_contiguous: |
|
img = np.ascontiguousarray(img.transpose(2, 0, 1)) |
|
img = to_tensor(img) |
|
else: |
|
img = to_tensor(img).permute(2, 0, 1).contiguous() |
|
|
|
packed_results['inputs'] = img |
|
|
|
if 'gt_ignore_flags' in results: |
|
valid_idx = np.where(results['gt_ignore_flags'] == 0)[0] |
|
ignore_idx = np.where(results['gt_ignore_flags'] == 1)[0] |
|
|
|
data_sample = DetDataSample() |
|
instance_data = InstanceData() |
|
ignore_instance_data = InstanceData() |
|
|
|
for key in self.mapping_table.keys(): |
|
if key not in results: |
|
continue |
|
if key == 'gt_masks' or isinstance(results[key], BaseBoxes): |
|
if 'gt_ignore_flags' in results: |
|
instance_data[ |
|
self.mapping_table[key]] = results[key][valid_idx] |
|
ignore_instance_data[ |
|
self.mapping_table[key]] = results[key][ignore_idx] |
|
else: |
|
instance_data[self.mapping_table[key]] = results[key] |
|
else: |
|
if 'gt_ignore_flags' in results: |
|
instance_data[self.mapping_table[key]] = to_tensor( |
|
results[key][valid_idx]) |
|
ignore_instance_data[self.mapping_table[key]] = to_tensor( |
|
results[key][ignore_idx]) |
|
else: |
|
instance_data[self.mapping_table[key]] = to_tensor( |
|
results[key]) |
|
data_sample.gt_instances = instance_data |
|
data_sample.ignored_instances = ignore_instance_data |
|
|
|
if 'proposals' in results: |
|
proposals = InstanceData( |
|
bboxes=to_tensor(results['proposals']), |
|
scores=to_tensor(results['proposals_scores'])) |
|
data_sample.proposals = proposals |
|
|
|
if 'gt_seg_map' in results: |
|
gt_sem_seg_data = dict( |
|
sem_seg=to_tensor(results['gt_seg_map'][None, ...].copy())) |
|
data_sample.gt_sem_seg = PixelData(**gt_sem_seg_data) |
|
|
|
img_meta = {} |
|
for key in self.meta_keys: |
|
assert key in results, f'`{key}` is not found in `results`, ' \ |
|
f'the valid keys are {list(results)}.' |
|
img_meta[key] = results[key] |
|
|
|
data_sample.set_metainfo(img_meta) |
|
packed_results['data_samples'] = data_sample |
|
|
|
return packed_results |
|
|
|
def __repr__(self) -> str: |
|
repr_str = self.__class__.__name__ |
|
repr_str += f'(meta_keys={self.meta_keys})' |
|
return repr_str |
|
|
|
|
|
|