|
|
|
import copy |
|
import warnings |
|
from collections import OrderedDict |
|
from typing import List, Optional, Sequence, Union |
|
|
|
import numpy as np |
|
from mmengine.evaluator import BaseMetric |
|
from mmengine.logging import MMLogger |
|
|
|
from mmdet.registry import METRICS |
|
from ..functional import eval_map, eval_recalls |
|
|
|
|
|
@METRICS.register_module() |
|
class VOCMetric(BaseMetric): |
|
"""Pascal VOC evaluation metric. |
|
|
|
Args: |
|
iou_thrs (float or List[float]): IoU threshold. Defaults to 0.5. |
|
scale_ranges (List[tuple], optional): Scale ranges for evaluating |
|
mAP. If not specified, all bounding boxes would be included in |
|
evaluation. Defaults to None. |
|
metric (str | list[str]): Metrics to be evaluated. Options are |
|
'mAP', 'recall'. If is list, the first setting in the list will |
|
be used to evaluate metric. |
|
proposal_nums (Sequence[int]): Proposal number used for evaluating |
|
recalls, such as recall@100, recall@1000. |
|
Default: (100, 300, 1000). |
|
eval_mode (str): 'area' or '11points', 'area' means calculating the |
|
area under precision-recall curve, '11points' means calculating |
|
the average precision of recalls at [0, 0.1, ..., 1]. |
|
The PASCAL VOC2007 defaults to use '11points', while PASCAL |
|
VOC2012 defaults to use 'area'. |
|
collect_device (str): Device name used for collecting results from |
|
different ranks during distributed training. Must be 'cpu' or |
|
'gpu'. Defaults to 'cpu'. |
|
prefix (str, optional): The prefix that will be added in the metric |
|
names to disambiguate homonymous metrics of different evaluators. |
|
If prefix is not provided in the argument, self.default_prefix |
|
will be used instead. Defaults to None. |
|
""" |
|
|
|
default_prefix: Optional[str] = 'pascal_voc' |
|
|
|
def __init__(self, |
|
iou_thrs: Union[float, List[float]] = 0.5, |
|
scale_ranges: Optional[List[tuple]] = None, |
|
metric: Union[str, List[str]] = 'mAP', |
|
proposal_nums: Sequence[int] = (100, 300, 1000), |
|
eval_mode: str = '11points', |
|
collect_device: str = 'cpu', |
|
prefix: Optional[str] = None) -> None: |
|
super().__init__(collect_device=collect_device, prefix=prefix) |
|
self.iou_thrs = [iou_thrs] if isinstance(iou_thrs, float) \ |
|
else iou_thrs |
|
self.scale_ranges = scale_ranges |
|
|
|
if not isinstance(metric, str): |
|
assert len(metric) == 1 |
|
metric = metric[0] |
|
allowed_metrics = ['recall', 'mAP'] |
|
if metric not in allowed_metrics: |
|
raise KeyError( |
|
f"metric should be one of 'recall', 'mAP', but got {metric}.") |
|
self.metric = metric |
|
self.proposal_nums = proposal_nums |
|
assert eval_mode in ['area', '11points'], \ |
|
'Unrecognized mode, only "area" and "11points" are supported' |
|
self.eval_mode = eval_mode |
|
|
|
|
|
|
|
def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: |
|
"""Process one batch of data samples and predictions. The processed |
|
results should be stored in ``self.results``, which will be used to |
|
compute the metrics when all batches have been processed. |
|
|
|
Args: |
|
data_batch (dict): A batch of data from the dataloader. |
|
data_samples (Sequence[dict]): A batch of data samples that |
|
contain annotations and predictions. |
|
""" |
|
for data_sample in data_samples: |
|
gt = copy.deepcopy(data_sample) |
|
|
|
gt_instances = gt['gt_instances'] |
|
gt_ignore_instances = gt['ignored_instances'] |
|
ann = dict( |
|
labels=gt_instances['labels'].cpu().numpy(), |
|
bboxes=gt_instances['bboxes'].cpu().numpy(), |
|
bboxes_ignore=gt_ignore_instances['bboxes'].cpu().numpy(), |
|
labels_ignore=gt_ignore_instances['labels'].cpu().numpy()) |
|
|
|
pred = data_sample['pred_instances'] |
|
pred_bboxes = pred['bboxes'].cpu().numpy() |
|
pred_scores = pred['scores'].cpu().numpy() |
|
pred_labels = pred['labels'].cpu().numpy() |
|
|
|
dets = [] |
|
for label in range(len(self.dataset_meta['classes'])): |
|
index = np.where(pred_labels == label)[0] |
|
pred_bbox_scores = np.hstack( |
|
[pred_bboxes[index], pred_scores[index].reshape((-1, 1))]) |
|
dets.append(pred_bbox_scores) |
|
|
|
self.results.append((ann, dets)) |
|
|
|
def compute_metrics(self, results: list) -> dict: |
|
"""Compute the metrics from processed results. |
|
|
|
Args: |
|
results (list): The processed results of each batch. |
|
|
|
Returns: |
|
dict: The computed metrics. The keys are the names of the metrics, |
|
and the values are corresponding results. |
|
""" |
|
logger: MMLogger = MMLogger.get_current_instance() |
|
gts, preds = zip(*results) |
|
eval_results = OrderedDict() |
|
if self.metric == 'mAP': |
|
assert isinstance(self.iou_thrs, list) |
|
dataset_type = self.dataset_meta.get('dataset_type') |
|
if dataset_type in ['VOC2007', 'VOC2012']: |
|
dataset_name = 'voc' |
|
if dataset_type == 'VOC2007' and self.eval_mode != '11points': |
|
warnings.warn('Pascal VOC2007 uses `11points` as default ' |
|
'evaluate mode, but you are using ' |
|
f'{self.eval_mode}.') |
|
elif dataset_type == 'VOC2012' and self.eval_mode != 'area': |
|
warnings.warn('Pascal VOC2012 uses `area` as default ' |
|
'evaluate mode, but you are using ' |
|
f'{self.eval_mode}.') |
|
else: |
|
dataset_name = self.dataset_meta['classes'] |
|
|
|
mean_aps = [] |
|
for iou_thr in self.iou_thrs: |
|
logger.info(f'\n{"-" * 15}iou_thr: {iou_thr}{"-" * 15}') |
|
|
|
|
|
|
|
|
|
|
|
mean_ap, _ = eval_map( |
|
preds, |
|
gts, |
|
scale_ranges=self.scale_ranges, |
|
iou_thr=iou_thr, |
|
dataset=dataset_name, |
|
logger=logger, |
|
eval_mode=self.eval_mode, |
|
use_legacy_coordinate=True) |
|
mean_aps.append(mean_ap) |
|
eval_results[f'AP{int(iou_thr * 100):02d}'] = round(mean_ap, 3) |
|
eval_results['mAP'] = sum(mean_aps) / len(mean_aps) |
|
eval_results.move_to_end('mAP', last=False) |
|
elif self.metric == 'recall': |
|
|
|
gt_bboxes = [ann['bboxes'] for ann in self.annotations] |
|
recalls = eval_recalls( |
|
gt_bboxes, |
|
results, |
|
self.proposal_nums, |
|
self.iou_thrs, |
|
logger=logger, |
|
use_legacy_coordinate=True) |
|
for i, num in enumerate(self.proposal_nums): |
|
for j, iou_thr in enumerate(self.iou_thrs): |
|
eval_results[f'recall@{num}@{iou_thr}'] = recalls[i, j] |
|
if recalls.shape[1] > 1: |
|
ar = recalls.mean(axis=1) |
|
for i, num in enumerate(self.proposal_nums): |
|
eval_results[f'AR@{num}'] = ar[i] |
|
return eval_results |
|
|