Shortcuts

Source code for mmeval.metrics.voc_map

# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import warnings
from multiprocessing.pool import Pool
from typing import Dict, List, Optional, Sequence, Tuple, Union

from mmeval.core.base_metric import BaseMetric
from mmeval.metrics.utils import calculate_bboxes_area, calculate_overlaps
from mmeval.utils import is_list_of


def calculate_average_precision(recalls: np.ndarray,
                                precisions: np.ndarray,
                                mode: str = 'area') -> float:
    """Calculate average precision in the detection task.

    Args:
        recalls (ndarray): The recalls with shape (num_dets, ).
        precisions (ndarray): The precisions with shape (num_dets, ).
        mode (str): The average mode, should be 'area' or '11points'.
            'area' means calculating the area under precision-recall curve.
            '11points' means calculating the average precision of recalls at
            [0, 0.1, ..., 1.0]. Defaults to 'area'.

    Returns:
        float: Calculated average precision.
    """
    assert mode in ['area', '11points']
    if mode == 'area':
        mrec = np.hstack((0, recalls, 1))
        mpre = np.hstack((0, precisions, 0))
        for i in range(mpre.shape[0] - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
        ind = np.where(mrec[1:] != mrec[:-1])[0]
        ap = np.sum((mrec[ind + 1] - mrec[ind]) * mpre[ind + 1])
    else:
        ap = 0.0
        for thr in np.arange(0, 1 + 1e-3, 0.1):
            precs = precisions[recalls >= thr]
            prec = precs.max() if precs.size > 0 else 0
            ap += prec
        ap /= 11.0
    return ap


def filter_by_bboxes_area(bboxes: np.ndarray,
                          min_area: Optional[float],
                          max_area: Optional[float],
                          use_legacy_coordinate=False) -> np.ndarray:
    """Filter the bboxes with an area range.

    Args:
        bboxes (numpy.ndarray): The bboxes with shape (n, 4) in 'xyxy' format.
        min_area (Optional[float]): The minimum area. If None, does not filter
            the minimum area.
        max_area (Optional[float]): The maximum area. If None, does not filter
            the maximum area.
        use_legacy_coordinate (bool): Whether to use coordinate system in
            mmdet v1.x. which means width, height should be
            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.
            Note when function is used in `VOCDataset`, it should be
            True to align with the official implementation
            `http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar`
            Defaults to False.

    Returns:
        numpy.ndarray: A mask of ``bboxes`` identify which bbox are filtered.
    """
    bboxes_area = calculate_bboxes_area(bboxes, use_legacy_coordinate)
    area_mask = np.ones_like(bboxes_area, dtype=bool)
    if min_area is not None:
        area_mask &= (bboxes_area >= min_area)
    if max_area is not None:
        area_mask &= (bboxes_area < max_area)
    return area_mask


[docs]class VOCMeanAP(BaseMetric): """Pascal VOC evaluation metric. This metric computes the VOC mAP (mean Average Precision) with the given IoU thresholds and scale ranges. Args: iou_thrs (float | List[float]): IoU thresholds. Defaults to 0.5. scale_ranges (List[tuple], optional): Scale ranges for evaluating mAP. If not specified, all bounding boxes would be included in evaluation. Defaults to None. num_classes (int, optional): The number of classes. If None, it will be obtained from the 'classes' field in ``self.dataset_meta``. Defaults to None. eval_mode (str): 'area' or '11points', 'area' means calculating the area under precision-recall curve, '11points' means calculating the average precision of recalls at [0, 0.1, ..., 1]. The PASCAL VOC2007 defaults to use '11points', while PASCAL VOC2012 defaults to use 'area'. Defaults to 'area'. use_legacy_coordinate (bool): Whether to use coordinate system in mmdet v1.x. which means width, height should be calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively. Defaults to False. nproc (int): Processes used for computing TP and FP. If nproc is less than or equal to 1, multiprocessing will not be used. Defaults to 4. drop_class_ap (bool): Whether to drop the class without ground truth when calculating the average precision for each class. classwise (bool): Whether to return the computed results of each class. Defaults to False. **kwargs: Keyword parameters passed to :class:`BaseMetric`. Examples: >>> import numpy as np >>> from mmeval import VOCMeanAP >>> num_classes = 4 >>> voc_map = VOCMeanAP(num_classes=4) >>> >>> def _gen_bboxes(num_bboxes, img_w=256, img_h=256): ... # random generate bounding boxes in 'xyxy' formart. ... x = np.random.rand(num_bboxes, ) * img_w ... y = np.random.rand(num_bboxes, ) * img_h ... w = np.random.rand(num_bboxes, ) * (img_w - x) ... h = np.random.rand(num_bboxes, ) * (img_h - y) ... return np.stack([x, y, x + w, y + h], axis=1) >>> >>> prediction = { ... 'bboxes': _gen_bboxes(10), ... 'scores': np.random.rand(10, ), ... 'labels': np.random.randint(0, num_classes, size=(10, )) ... } >>> groundtruth = { ... 'bboxes': _gen_bboxes(10), ... 'labels': np.random.randint(0, num_classes, size=(10, )), ... 'bboxes_ignore': _gen_bboxes(5), ... 'labels_ignore': np.random.randint(0, num_classes, size=(5, )) ... } >>> voc_map(predictions=[prediction, ], groundtruths=[groundtruth, ]) # doctest: +ELLIPSIS # noqa: E501 {'AP50': ..., 'mAP': ...} """ def __init__(self, iou_thrs: Union[float, List[float]] = 0.5, scale_ranges: Optional[List[Tuple]] = None, num_classes: Optional[int] = None, eval_mode: str = 'area', use_legacy_coordinate: bool = False, nproc: int = 4, drop_class_ap: bool = True, classwise: bool = False, **kwargs) -> None: super().__init__(**kwargs) if isinstance(iou_thrs, float): iou_thrs = [iou_thrs] assert is_list_of(iou_thrs, float), \ '`iou_thrs` should be float or a list of float' self.iou_thrs = iou_thrs if scale_ranges is None: scale_ranges = [(None, None)] elif (None, None) not in scale_ranges: # We allawys compute the mAP across all scale. scale_ranges.append((None, None)) self.scale_ranges = scale_ranges area_ranges = [] for min_scale, max_scale in self.scale_ranges: min_area = min_scale if min_scale is None else min_scale**2 max_area = max_scale if max_scale is None else max_scale**2 area_ranges.append((min_area, max_area)) self._area_ranges = area_ranges self._num_classes = num_classes assert eval_mode in ['area', '11points'], \ 'Unrecognized mode, only "area" and "11points" are supported' self.eval_mode = eval_mode self.nproc = nproc self.use_legacy_coordinate = use_legacy_coordinate self.drop_class_ap = drop_class_ap self.classwise = classwise self.num_iou = len(self.iou_thrs) self.num_scale = len(self.scale_ranges) @property def num_classes(self) -> int: """Returns the number of classes. The number of classes should be set during initialization, otherwise it will be obtained from the 'classes' field in ``self.dataset_meta``. Returns: int: The number of classes. Raises: RuntimeError: If the num_classes is not set. """ if self._num_classes is not None: return self._num_classes if self.dataset_meta and 'classes' in self.dataset_meta: self._num_classes = len(self.dataset_meta['classes']) elif self.dataset_meta and 'CLASSES' in self.dataset_meta: self._num_classes = len(self.dataset_meta['CLASSES']) warnings.warn( 'The `CLASSES` in `dataset_meta` is deprecated, ' 'use `classes` instead!', DeprecationWarning) else: raise RuntimeError( "The `num_claases` is required, and also not found 'classes' " f'in dataset_meta: {self.dataset_meta}') return self._num_classes
[docs] def add(self, predictions: Sequence[Dict], groundtruths: Sequence[Dict]) -> None: # type: ignore # yapf: disable # noqa: E501 """Add the intermediate results to ``self._results``. Args: predictions (Sequence[dict]): A sequence of dict. Each dict representing a detection result for an image, with the following keys: - bboxes (numpy.ndarray): Shape (N, 4), the predicted bounding bboxes of this image, in 'xyxy' foramrt. - scores (numpy.ndarray): Shape (N, 1), the predicted scores of bounding boxes. - labels (numpy.ndarray): Shape (N, 1), the predicted labels of bounding boxes. groundtruths (Sequence[dict]): A sequence of dict. Each dict represents a groundtruths for an image, with the following keys: - bboxes (numpy.ndarray): Shape (M, 4), the ground truth bounding bboxes of this image, in 'xyxy' foramrt. - labels (numpy.ndarray): Shape (M, 1), the ground truth labels of bounding boxes. - bboxes_ignore (numpy.ndarray): Shape (K, 4), the ground truth ignored bounding bboxes of this image, in 'xyxy' foramrt. - labels_ignore (numpy.ndarray): Shape (K, 1), the ground truth ignored labels of bounding boxes. """ for prediction, groundtruth in zip(predictions, groundtruths): assert isinstance(prediction, dict), 'The prediciton should be ' \ f'a sequence of dict, but got a sequence of {type(prediction)}.' # noqa: E501 assert isinstance(groundtruth, dict), 'The label should be ' \ f'a sequence of dict, but got a sequence of {type(groundtruth)}.' # noqa: E501 self._results.append((prediction, groundtruth))
@staticmethod def _calculate_image_tpfp( pred_bboxes: np.ndarray, gt_bboxes: np.ndarray, ignore_gt_bboxes: np.ndarray, iou_thrs: List[float], area_ranges: List[Tuple[Optional[float], Optional[float]]], use_legacy_coordinate: bool) -> Tuple[np.ndarray, np.ndarray]: """Calculate the true positive and false positive on an image. Args: pred_bboxes (numpy.ndarray): Predicted bboxes of this image, with shape (N, 5). The scores The predicted score of the bbox is concatenated behind the predicted bbox. gt_bboxes (numpy.ndarray): Ground truth bboxes of this image, with shape (M, 4). ignore_gt_bboxes (numpy.ndarray): Ground truth ignored bboxes of this image, with shape (K, 4). iou_thrs (List[float]): The IoU thresholds. area_ranges (List[Tuple]): The area ranges. use_legacy_coordinate (bool): Refer to :class:`VOCMeanAP`. Returns: tuple (tp, fp): - tp (numpy.ndarray): Shape (num_ious, num_scales, N), the true positive flag of each predicted bbox on this image. - fp (numpy.ndarray): Shape (num_ious, num_scales, N), the false positive flag of each predicted bbox on this image. Note: This method should be a staticmethod to avoid resource competition during multiple processes. """ # Step 1. Concatenate `gt_bboxes` and `ignore_gt_bboxes`, then set # the `ignore_gt_flags`. all_gt_bboxes = np.concatenate((gt_bboxes, ignore_gt_bboxes)) ignore_gt_flags = np.concatenate((np.zeros( (gt_bboxes.shape[0], 1), dtype=bool), np.ones((ignore_gt_bboxes.shape[0], 1), dtype=bool))) # Step 2. Initialize the `tp` and `fp` arrays. num_preds = pred_bboxes.shape[0] tp = np.zeros((len(iou_thrs), len(area_ranges), num_preds)) fp = np.zeros((len(iou_thrs), len(area_ranges), num_preds)) # Step 3. If there are no gt bboxes in this image, then all pred bboxes # within area range are false positives. if all_gt_bboxes.shape[0] == 0: for idx, (min_area, max_area) in enumerate(area_ranges): area_mask = filter_by_bboxes_area(pred_bboxes[:, :4], min_area, max_area) fp[:, idx, area_mask] = 1 return tp, fp # Step 4. Calculate the IoUs between the predicted bboxes and the # ground truth bboxes. ious = calculate_overlaps( pred_bboxes[:, :4], all_gt_bboxes, mode='iou', use_legacy_coordinate=use_legacy_coordinate) # For each pred bbox, the max iou with all gts. ious_max = ious.max(axis=1) # For each pred bbox, which gt overlaps most with it. ious_argmax = ious.argmax(axis=1) # Sort all pred bbox in descending order by scores. sorted_indices = np.argsort(-pred_bboxes[:, -1]) # Step 5. Count the `tp` and `fp` of each iou threshold and area range. for iou_thr_idx, iou_thr in enumerate(iou_thrs): for area_idx, (min_area, max_area) in enumerate(area_ranges): # The flags that gt bboxes have been matched. gt_covered_flags = np.zeros(all_gt_bboxes.shape[0], dtype=bool) # The flags that gt bboxes out of area range. gt_area_mask = filter_by_bboxes_area(all_gt_bboxes, min_area, max_area) ignore_gt_area_flags = ~gt_area_mask # Count the prediction bboxes in order of decreasing score. for pred_bbox_idx in sorted_indices: if ious_max[pred_bbox_idx] >= iou_thr: matched_gt_idx = ious_argmax[pred_bbox_idx] # Ignore the pred bbox that match an ignored gt bbox. if ignore_gt_flags[matched_gt_idx]: continue # Ignore the pred bbox that is out of area range. if ignore_gt_area_flags[matched_gt_idx]: continue if not gt_covered_flags[matched_gt_idx]: tp[iou_thr_idx, area_idx, pred_bbox_idx] = 1 gt_covered_flags[matched_gt_idx] = True else: # This gt bbox has been matched and counted as fp. fp[iou_thr_idx, area_idx, pred_bbox_idx] = 1 else: area_mask = filter_by_bboxes_area( pred_bboxes[pred_bbox_idx, :4], min_area, max_area) if area_mask: fp[iou_thr_idx, area_idx, pred_bbox_idx] = 1 return tp, fp
[docs] def get_class_predictions(self, predictions: List[dict], class_index: int) -> List: """Get prediciton results of a certain class index. Args: predictions (list[dict]): Same as :class:`VOCMeanAP.add`. class_index (int): Index of a specific class. Returns: list[np.ndarray]: A list of predicted bboxes of this class. Each predicted score of the bbox is concatenated behind the predicted bbox. """ class_preds = [] for pred in predictions: pred_indices = (pred['labels'] == class_index) pred_bboxes_info = np.concatenate( (pred['bboxes'][pred_indices, :], pred['scores'][pred_indices].reshape((-1, 1))), axis=1) class_preds.append(pred_bboxes_info) return class_preds
[docs] def get_class_gts(self, groundtruths: List[dict], class_index: int) -> Tuple: """Get prediciton gt information of a certain class index. Args: groundtruths (list[dict]): Same as :class:`VOCMeanAP.add`. class_index (int): Index of a specific class. Returns: tuple (class_gts, class_ignore_gts): - class_gts (List[numpy.ndarray]): The gt bboxes of this class. - class_ignore_gts (List[numpy.ndarray]): The ignored gt bboxes of this class. This is necessary when counting tp and fp. """ class_gts = [] class_ignore_gts = [] for gt in groundtruths: gt_indices = (gt['labels'] == class_index) gt_bboxes = gt['bboxes'][gt_indices, :] ignore_gt_indices = (gt['labels_ignore'] == class_index) ignore_gt_bboxes = gt['bboxes_ignore'][ignore_gt_indices, :] class_gts.append(gt_bboxes) class_ignore_gts.append(ignore_gt_bboxes) return class_gts, class_ignore_gts
[docs] def calculate_class_tpfp(self, predictions: List[dict], groundtruths: List[dict], class_index: int, pool: Optional[Pool]) -> Tuple: """Calculate the tp and fp of the given class index. Args: predictions (List[dict]): A list of dict. Each dict is the detection result of an image. Same as :class:`VOCMeanAP.add`. groundtruths (List[dict]): A list of dict. Each dict is the ground truth of an image. Same as :class:`VOCMeanAP.add`. class_index (int): The class index. pool (Optional[Pool]): A instance of :class:`multiprocessing.Pool`. If None, do not use multiprocessing. Returns: tuple (tp, fp, num_gts): - tp (numpy.ndarray): Shape (num_ious, num_scales, num_pred), the true positive flag of each predicted bbox for this class. - fp (numpy.ndarray): Shape (num_ious, num_scales, num_pred), the false positive flag of each predicted bbox for this class. - num_gts (numpy.ndarray): Shape (num_ious, num_scales), the number of ground truths. """ class_preds = self.get_class_predictions(predictions, class_index) class_gts, class_ignore_gts = self.get_class_gts( groundtruths, class_index) if pool is not None: num_images = len(class_preds) tpfp_list = pool.starmap( self._calculate_image_tpfp, zip(class_preds, class_gts, class_ignore_gts, [self.iou_thrs] * num_images, [self._area_ranges] * num_images, [self.use_legacy_coordinate] * num_images)) else: tpfp_list = [] for img_idx in range(len(class_preds)): tpfp = self._calculate_image_tpfp(class_preds[img_idx], class_gts[img_idx], class_ignore_gts[img_idx], self.iou_thrs, self._area_ranges, self.use_legacy_coordinate) tpfp_list.append(tpfp) image_tp_list, image_fp_list = tuple(zip(*tpfp_list)) sorted_indices = np.argsort(-np.vstack(class_preds)[:, -1]) tp = np.concatenate(image_tp_list, axis=2)[..., sorted_indices] fp = np.concatenate(image_fp_list, axis=2)[..., sorted_indices] num_gts = np.zeros((self.num_iou, self.num_scale), dtype=int) for idx, (min_area, max_area) in enumerate(self._area_ranges): area_mask = self._filter_by_bboxes_area( np.vstack(class_gts), min_area, max_area) num_gts[:, idx] = np.sum(area_mask) return tp, fp, num_gts
[docs] def compute_metric(self, results: list) -> dict: """Compute the VOCMeanAP metric. Args: results (List[tuple]): A list of tuple. Each tuple is the prediction and ground truth of an image. This list has already been synced across all ranks. Returns: dict: The computed metric, with the following keys: - mAP, the averaged across all IoU thresholds and all class. - AP{IoU}, the mAP of the specified IoU threshold. - mAP@{scale_range}, the mAP of the specified scale range. - classwise, the evaluation results of each class. This would be returned if ``self.classwise`` is True. """ predictions, groundtruths = zip(*results) nproc = min(self.nproc, len(predictions)) if nproc > 1: pool = Pool(nproc) else: pool = None # type: ignore results_per_class = [] for class_index in range(self.num_classes): # Calculate tp, fp and num_gts. tp, fp, num_gts = self.calculate_class_tpfp( predictions, groundtruths, class_index, pool) # Calculate recalls and precisions. tp_cumsum = np.cumsum(tp, axis=2) fp_cumsum = np.cumsum(fp, axis=2) eps = np.finfo(np.float32).eps precisions = tp_cumsum / np.maximum((tp_cumsum + fp_cumsum), eps) recalls = tp_cumsum / np.maximum(num_gts[..., np.newaxis], eps) # Calculate average precision per `iou_thr` and `scale_range`. ap = np.zeros((self.num_iou, self.num_scale), dtype=np.float32) for i in range(self.num_iou): for j in range(self.num_scale): ap[i, j] = calculate_average_precision( recalls[i, j], precisions[i, j], self.eval_mode) results_per_class.append({ 'num_gts': num_gts, 'num_dets': tp.shape[-1], 'recalls': recalls, 'precisions': precisions, 'ap': ap, }) if pool is not None: pool.close() eval_results = self._aggregate_results(results_per_class) if self.classwise: eval_results['classwise_result'] = results_per_class return eval_results
def _aggregate_results(self, results_per_class: List[dict]) -> dict: """Aggregate class-wise results and return a dictionary. Args: results_per_class (List[dict]): The class-wise evaluate results. Returns: dict: The aggregated metric results, with the following keys: - mAP, the averaged across all IoU thresholds and all class. - AP{IoU}, the mAP of the specified IoU threshold. - mAP@{scale_range}, the mAP of the specified scale range. """ eval_results = {} # Calculate `AP{iou_thr}` (while scale_range is None) for each # `iou_thrs`. for i, iou_thr in enumerate(self.iou_thrs): for j, scale_range in enumerate(self.scale_ranges): if scale_range != (None, None): continue aps = [ res['ap'][i][j] for res in results_per_class if res['num_gts'][i][j] > 0 or not self.drop_class_ap ] eval_results[f'AP{round(iou_thr * 100)}'] = np.array( aps).mean().item() # Calculate `mAP@{scale_range}` and `mAP` (while scale_range is None) # overall `iou_thrs`. for j, scale_range in enumerate(self.scale_ranges): ap_per_ious = [] for i in range(len(self.iou_thrs)): aps = [ res['ap'][i][j] for res in results_per_class if res['num_gts'][i][j] > 0 or not self.drop_class_ap ] ap_per_ious.append(np.array(aps).mean().item()) if scale_range == (None, None): key = 'mAP' else: key = f'mAP@{scale_range}' eval_results[key] = np.array(ap_per_ious).mean().item() return eval_results def _filter_by_bboxes_area(self, bboxes: np.ndarray, min_area: Optional[float], max_area: Optional[float]): """Filter the bboxes with an area range. Args: bboxes (numpy.ndarray): The bboxes with shape (n, 4) in 'xyxy' format. min_area (Optional[float]): The minimum area. If None, does not filter the minimum area. max_area (Optional[float]): The maximum area. If None, does not filter the maximum area. Returns: numpy.ndarray: A mask of ``bboxes`` identify which bbox are filtered. """ return filter_by_bboxes_area(bboxes, min_area, max_area, self.use_legacy_coordinate)
Read the Docs v: latest
Versions
latest
stable
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.