|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Wrappers and conversions for third party pycocotools. |
|
|
|
This is derived from code in the Tensorflow Object Detection API: |
|
https://github.com/tensorflow/models/tree/master/research/object_detection |
|
|
|
Huang et. al. "Speed/accuracy trade-offs for modern convolutional object |
|
detectors" CVPR 2017. |
|
""" |
|
|
|
from typing import Any, Collection, Dict, List, Optional, Union |
|
|
|
import numpy as np |
|
from pycocotools import mask |
|
|
|
|
|
COCO_METRIC_NAMES_AND_INDEX = ( |
|
('Precision/mAP', 0), |
|
('Precision/[email protected]', 1), |
|
('Precision/[email protected]', 2), |
|
('Precision/mAP (small)', 3), |
|
('Precision/mAP (medium)', 4), |
|
('Precision/mAP (large)', 5), |
|
('Recall/AR@1', 6), |
|
('Recall/AR@10', 7), |
|
('Recall/AR@100', 8), |
|
('Recall/AR@100 (small)', 9), |
|
('Recall/AR@100 (medium)', 10), |
|
('Recall/AR@100 (large)', 11) |
|
) |
|
|
|
|
|
def _ConvertBoxToCOCOFormat(box: np.ndarray) -> List[float]: |
|
"""Converts a box in [ymin, xmin, ymax, xmax] format to COCO format. |
|
|
|
This is a utility function for converting from our internal |
|
[ymin, xmin, ymax, xmax] convention to the convention used by the COCO API |
|
i.e., [xmin, ymin, width, height]. |
|
|
|
Args: |
|
box: a [ymin, xmin, ymax, xmax] numpy array |
|
|
|
Returns: |
|
a list of floats representing [xmin, ymin, width, height] |
|
""" |
|
return [float(box[1]), float(box[0]), float(box[3] - box[1]), |
|
float(box[2] - box[0])] |
|
|
|
|
|
def ExportSingleImageGroundtruthToCoco( |
|
image_id: Union[int, str], |
|
next_annotation_id: int, |
|
category_id_set: Collection[int], |
|
groundtruth_boxes: np.ndarray, |
|
groundtruth_classes: np.ndarray, |
|
groundtruth_masks: np.ndarray, |
|
groundtruth_is_crowd: Optional[np.ndarray] = None) -> List[Dict[str, Any]]: |
|
"""Exports groundtruth of a single image to COCO format. |
|
|
|
This function converts groundtruth detection annotations represented as numpy |
|
arrays to dictionaries that can be ingested by the COCO evaluation API. Note |
|
that the image_ids provided here must match the ones given to |
|
ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in |
|
correspondence - that is: groundtruth_boxes[i, :], and |
|
groundtruth_classes[i] are associated with the same groundtruth annotation. |
|
|
|
In the exported result, "area" fields are always set to the foregorund area of |
|
the mask. |
|
|
|
Args: |
|
image_id: a unique image identifier either of type integer or string. |
|
next_annotation_id: integer specifying the first id to use for the |
|
groundtruth annotations. All annotations are assigned a continuous integer |
|
id starting from this value. |
|
category_id_set: A set of valid class ids. Groundtruth with classes not in |
|
category_id_set are dropped. |
|
groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] |
|
groundtruth_classes: numpy array (int) with shape [num_gt_boxes] |
|
groundtruth_masks: uint8 numpy array of shape [num_detections, image_height, |
|
image_width] containing detection_masks. |
|
groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] |
|
indicating whether groundtruth boxes are crowd. |
|
|
|
Returns: |
|
a list of groundtruth annotations for a single image in the COCO format. |
|
|
|
Raises: |
|
ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the |
|
right lengths or (2) if each of the elements inside these lists do not |
|
have the correct shapes or (3) if image_ids are not integers |
|
""" |
|
|
|
if len(groundtruth_classes.shape) != 1: |
|
raise ValueError('groundtruth_classes is ' |
|
'expected to be of rank 1.') |
|
if len(groundtruth_boxes.shape) != 2: |
|
raise ValueError('groundtruth_boxes is expected to be of ' |
|
'rank 2.') |
|
if groundtruth_boxes.shape[1] != 4: |
|
raise ValueError('groundtruth_boxes should have ' |
|
'shape[1] == 4.') |
|
num_boxes = groundtruth_classes.shape[0] |
|
if num_boxes != groundtruth_boxes.shape[0]: |
|
raise ValueError('Corresponding entries in groundtruth_classes, ' |
|
'and groundtruth_boxes should have ' |
|
'compatible shapes (i.e., agree on the 0th dimension).' |
|
'Classes shape: %d. Boxes shape: %d. Image ID: %s' % ( |
|
groundtruth_classes.shape[0], |
|
groundtruth_boxes.shape[0], image_id)) |
|
has_is_crowd = groundtruth_is_crowd is not None |
|
if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: |
|
raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') |
|
groundtruth_list = [] |
|
for i in range(num_boxes): |
|
if groundtruth_classes[i] in category_id_set: |
|
iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 |
|
segment = mask.encode(np.asfortranarray(groundtruth_masks[i])) |
|
area = mask.area(segment) |
|
export_dict = { |
|
'id': next_annotation_id + i, |
|
'image_id': image_id, |
|
'category_id': int(groundtruth_classes[i]), |
|
'bbox': list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), |
|
'segmentation': segment, |
|
'area': area, |
|
'iscrowd': iscrowd |
|
} |
|
|
|
groundtruth_list.append(export_dict) |
|
return groundtruth_list |
|
|
|
|
|
def ExportSingleImageDetectionMasksToCoco( |
|
image_id: Union[int, str], category_id_set: Collection[int], |
|
detection_masks: np.ndarray, detection_scores: np.ndarray, |
|
detection_classes: np.ndarray) -> List[Dict[str, Any]]: |
|
"""Exports detection masks of a single image to COCO format. |
|
|
|
This function converts detections represented as numpy arrays to dictionaries |
|
that can be ingested by the COCO evaluation API. We assume that |
|
detection_masks, detection_scores, and detection_classes are in correspondence |
|
- that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] |
|
are associated with the same annotation. |
|
|
|
Args: |
|
image_id: unique image identifier either of type integer or string. |
|
category_id_set: A set of valid class ids. Detections with classes not in |
|
category_id_set are dropped. |
|
detection_masks: uint8 numpy array of shape [num_detections, image_height, |
|
image_width] containing detection_masks. |
|
detection_scores: float numpy array of shape [num_detections] containing |
|
scores for detection masks. |
|
detection_classes: integer numpy array of shape [num_detections] containing |
|
the classes for detection masks. |
|
|
|
Returns: |
|
a list of detection mask annotations for a single image in the COCO format. |
|
|
|
Raises: |
|
ValueError: if (1) detection_masks, detection_scores and detection_classes |
|
do not have the right lengths or (2) if each of the elements inside these |
|
lists do not have the correct shapes or (3) if image_ids are not integers. |
|
""" |
|
|
|
if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: |
|
raise ValueError('All entries in detection_classes and detection_scores' |
|
'expected to be of rank 1.') |
|
num_boxes = detection_classes.shape[0] |
|
if not num_boxes == len(detection_masks) == detection_scores.shape[0]: |
|
raise ValueError('Corresponding entries in detection_classes, ' |
|
'detection_scores and detection_masks should have ' |
|
'compatible lengths and shapes ' |
|
'Classes length: %d. Masks length: %d. ' |
|
'Scores length: %d' % ( |
|
detection_classes.shape[0], len(detection_masks), |
|
detection_scores.shape[0] |
|
)) |
|
detections_list = [] |
|
for i in range(num_boxes): |
|
if detection_classes[i] in category_id_set: |
|
detections_list.append({ |
|
'image_id': image_id, |
|
'category_id': int(detection_classes[i]), |
|
'segmentation': mask.encode(np.asfortranarray(detection_masks[i])), |
|
'score': float(detection_scores[i]) |
|
}) |
|
return detections_list |
|
|