Source code for mmeval.metrics.oid_map
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import csv
import json
import numpy as np
import warnings
from multiprocessing.pool import Pool
from typing import Dict, List, Optional, Sequence, Tuple, Union
from mmeval.metrics.utils import calculate_overlaps
from .voc_map import VOCMeanAP, filter_by_bboxes_area
def _convert_hierarchy_tree(hierarchy_map: dict,
label_index_map: dict,
relation_matrix: np.ndarray,
parents: list = [],
get_all_parents: bool = True) -> np.ndarray:
"""Get matrix of the corresponding relationship between the parent class
and the child class.
Args:
hierarchy_map (dict): Including label name and corresponding
subcategory. Keys of dicts are:
- `LabeName` (str): Name of the label.
- `Subcategory` (dict | list): Corresponding subcategory(ies).
label_index_map (dict): The mapping of label name to description names.
relation_matrix (ndarray): The matrix of the corresponding
relationship between the parent class and the child class,
of shape (class_num, class_num).
parents (list): Corresponding parent class. Defaults to [].
get_all_parents (bool): Whether get all parent names.
Defaults to True.
Returns:
numpy.ndarray: The matrix of the corresponding relationship between the
parent class and the child class, of shape (class_num, class_num).
"""
if 'Subcategory' not in hierarchy_map:
return relation_matrix
for node in hierarchy_map['Subcategory']:
if 'LabelName' not in node:
continue
children_name = node['LabelName']
children_index = label_index_map[children_name]
children = [children_index]
if len(parents) > 0:
for parent_index in parents:
if get_all_parents:
children.append(parent_index)
relation_matrix[children_index, parent_index] = 1
relation_matrix = _convert_hierarchy_tree(
node, label_index_map, relation_matrix, parents=children)
return relation_matrix
def get_relation_matrix(hierarchy_file: str, label_file: str) -> np.ndarray:
"""Get the matrix of class hierarchy from the hierarchy file.
Hierarchy for 600 classes can be found at https://storage.googleapis.com/
openimages/2018_04/bbox_labels_600_hierarchy_visualizer/circle.html.
Reference: https://github.com/open-mmlab/mmdetection/blob/
9d3e162459590eee4cfc891218dfbb5878378842/mmdet/datasets/openimages.py#L357
Args:
hierarchy_file (str): File path to the hierarchy for classes.
E.g. https://storage.googleapis.com/openimages/2018_04/
bbox_labels_600_hierarchy.json
label_file (str): File path to the mapping of label name to class
description names. E.g. https://storage.googleapis.com/openimages
/2018_04/class-descriptions-boxable.csv
Returns:
np.ndarray: The matrix of the corresponding relationship between
the parent class and the child class, of shape (class_num, class_num).
"""
index_list = []
classes_names = []
with open(label_file) as f:
reader = csv.reader(f)
for line in reader:
classes_names.append(line[1])
index_list.append(line[0])
label_index_map = {index: i for i, index in enumerate(index_list)}
with open(hierarchy_file) as f:
hierarchy_map = json.load(f)
class_num = len(label_index_map)
relation_matrix = np.eye(class_num, class_num)
relation_matrix = _convert_hierarchy_tree(hierarchy_map, label_index_map,
relation_matrix)
return relation_matrix
[docs]class OIDMeanAP(VOCMeanAP):
"""Open Images Dataset detection evaluation metric.
The Open Images Dataset detection evaluation uses a variant of the standard
PASCAL VOC 2010 mean Average Precision (mAP) at IoU > 0.5. There are some
key features of Open Images annotations, which are addressed by the new
metric.
For more see: https://storage.googleapis.com/openimages/web/evaluation.html
Args:
iof_thrs (float | List[float]): IoF thresholds. Defaults to 0.5.
use_group_of (bool): Whether consider group of groud truth
bboxes during evaluating. Defaults to True.
get_supercategory (bool, optional): Whether to get parent class of the
current class. Defaults to True.
filter_labels (bool, optional): Whether filter unannotated classes.
Defaults to True.
class_relation_matrix (numpy.ndarray, optional): The matrix of the
corresponding relationship between the parent class and the child
class. If None, it will be obtained from the 'relation_matrix'
field in ``self.dataset_meta``. Defaults to None.
**kwargs: Keyword parameters passed to :class:`VOCMeanAP`.
Examples:
>>> import numpy as np
>>> from mmeval import OIDMeanAP
>>> num_classes = 4
>>> # use a fake relation_matrix
>>> relation_matrix = np.eye(num_classes, num_classes)
>>> oid_map = OIDMeanAP(
... num_classes=4, class_relation_matrix=relation_matrix)
>>>
>>> def _gen_bboxes(num_bboxes, img_w=256, img_h=256):
... # random generate bounding boxes in 'xyxy' formart.
... x = np.random.rand(num_bboxes, ) * img_w
... y = np.random.rand(num_bboxes, ) * img_h
... w = np.random.rand(num_bboxes, ) * (img_w - x)
... h = np.random.rand(num_bboxes, ) * (img_h - y)
... return np.stack([x, y, x + w, y + h], axis=1)
>>>
>>> prediction = {
... 'bboxes': _gen_bboxes(10),
... 'scores': np.random.rand(10, ),
... 'labels': np.random.randint(0, num_classes, size=(10, ))
... }
>>> instances = []
>>> for bbox in _gen_bboxes(20):
... instances.append({
... 'bbox': bbox.tolist(),
... 'bbox_label': random.randint(0, num_classes - 1),
... 'is_group_of': random.randint(0, 1) == 0,
... })
>>> groundtruth = {
... 'instances': instances,
... 'image_level_labels': np.random.randint(0, num_classes, size=(10, )), # noqa: E501
... }
>>> oid_map(predictions=[prediction, ], groundtruths=[groundtruth, ]) # doctest: +ELLIPSIS # noqa: E501
{'AP@50': ..., 'mAP': ...}
"""
def __init__(self,
iof_thrs: Union[float, List[float]] = 0.5,
use_group_of: bool = True,
get_supercategory: bool = True,
filter_labels: bool = True,
class_relation_matrix: Optional[np.ndarray] = None,
**kwargs) -> None:
super().__init__(**kwargs)
if isinstance(iof_thrs, float):
iof_thrs = [iof_thrs]
self.iof_thrs = iof_thrs
self.num_iof = len(iof_thrs)
assert self.num_iof == self.num_iou, "The IoU and IoF thresholds' \
' should be one-to-one correspondence"
self.use_group_of = use_group_of
self.get_supercategory = get_supercategory
self.filter_labels = filter_labels
self._class_relation_matrix = class_relation_matrix
@property
def class_relation_matrix(self) -> np.ndarray:
"""Returns the class relation matrix.
The class relation matrix should be set during initialization,
otherwise it will be obtained from the 'relation_matrix' field in
``self.dataset_meta``.
Returns:
numpy.ndarray: The class relation matrix.
Raises:
RuntimeError: If the class relation matrix is not set.
"""
if self._class_relation_matrix is not None:
return self._class_relation_matrix
if self.dataset_meta and 'relation_matrix' in self.dataset_meta:
self._class_relation_matrix = self.dataset_meta['relation_matrix']
elif self.dataset_meta and 'RELATION_MATRIX' in self.dataset_meta:
self._class_relation_matrix = self.dataset_meta['RELATION_MATRIX']
warnings.warn(
'The `RELATION_MATRIX` in `dataset_meta` is deprecated, '
'use `relation_matrix` instead!', DeprecationWarning)
else:
raise RuntimeError(
'The `class_relation_matrix` is required, and also not found'
f" 'relation_matrix' in dataset_meta: {self.dataset_meta}")
return self._class_relation_matrix
def _extend_supercategory_ann(self, instances: List[dict]) -> List[dict]:
"""Extend parent classes's annotation of the corresponding class.
Args:
instances (List[dict]): A list of annotations of the instances.
Returns:
List[dict]: Annotations extended with super-category.
"""
supercat_instances = []
relation_matrix = self.class_relation_matrix
for instance in instances:
# Find the super-category by class relation matrix.
supercats = np.where(relation_matrix[instance['bbox_label']])[0]
for supercat in supercats:
if supercat == instance['bbox_label']:
continue
# Copy this instance and change the 'bbox_label'
new_instance = copy.deepcopy(instance)
new_instance['bbox_label'] = supercat
supercat_instances.append(new_instance)
return supercat_instances
def _process_prediction(self, pred: dict,
allowed_labels: np.ndarray) -> dict:
"""Process results of the corresponding label of the predicted bboxes.
It will choose to do the following 2 processing according to
``self.get_supercategory`` and ``self.filter_labels``:
1. Extend the the parent label of the corresponding prediction box.
2. Filter unannotated classes of the corresponding prediction box.
Args:
pred (dict): The predicted detection result for an image, with the
following keys:
- bboxes (numpy.ndarray): Shape (N, 4), the predicted bounding
bboxes of this image, in 'xyxy' foramrt.
- scores (numpy.ndarray): Shape (N, 1), the predicted scores of
bounding boxes.
- labels (numpy.ndarray): Shape (N, 1), the predicted labels of
bounding boxes.
Returns:
pred (dict): The processed predicted detection result.
"""
origin_pred = copy.deepcopy(pred)
relation_matrix = self.class_relation_matrix
for pred_label in np.unique(pred['labels']):
supercat_labels = np.where(relation_matrix[pred_label])[0]
for supercat in supercat_labels:
if (supercat in allowed_labels and supercat != pred_label
and self.get_supercategory):
# add super-supercategory preds
indices = np.where(origin_pred['labels'] == pred_label)[0]
pred['scores'] = np.concatenate(
[pred['scores'], origin_pred['scores'][indices]])
pred['bboxes'] = np.concatenate(
[pred['bboxes'], origin_pred['bboxes'][indices]])
extend_labels = np.full(
indices.shape, supercat, dtype=np.int64) # noqa: E501
pred['labels'] = np.concatenate(
[pred['labels'], extend_labels]) # noqa: E501
elif supercat not in allowed_labels and self.filter_labels:
indices = np.where(pred['labels'] != supercat)[0]
pred['scores'] = pred['scores'][indices]
pred['bboxes'] = pred['bboxes'][indices]
pred['labels'] = pred['labels'][indices]
return pred
def _stack_gt_instances(self,
instances: List[dict]) -> Dict[str, np.ndarray]:
"""Stack the gt instances from a list of dict to a dict.
Args:
instances (List[dict]): List[dict], each dict representing a
bounding box with the following keys:
- bbox (list): Containing box location in 'xyxy' foramrt.
- bbox_label (int): Box label index.
- is_group_of (bool): Whether the box is group or not.
Returns:
dict: The stacked gt instances, with the following keys:
- bboxes (numpy.ndarray): Shape (N, 4).
- labels (numpy.ndarray): Shape (N, ).
- is_group_ofs (numpy.ndarray): Shape (N, ).
"""
gt_labels, gt_bboxes, is_group_ofs = [], [], []
for ins in instances:
gt_labels.append(ins['bbox_label'])
gt_bboxes.append(ins['bbox'])
is_group_ofs.append(ins['is_group_of'])
annotation = {
'bboxes': np.array(gt_bboxes, dtype=np.float32).reshape((-1, 4)),
'labels': np.array(gt_labels, dtype=np.int64),
'is_group_ofs': np.array(is_group_ofs, dtype=bool),
}
return annotation
[docs] def add(self, predictions: Sequence[dict], groundtruths: Sequence[dict]) -> None: # type: ignore # yapf: disable # noqa: E501
"""Add the intermediate results to ``self._results``.
Args:
predictions (Sequence[dict]): A sequence of dict. Each dict
representing a detection result for an image, with the
following keys:
- bboxes (numpy.ndarray): Shape (N, 4), the predicted bounding
bboxes of this image, in 'xyxy' foramrt.
- scores (numpy.ndarray): Shape (N, 1), the predicted scores
of bounding boxes.
- labels (numpy.ndarray): Shape (N, 1), the predicted labels
of bounding boxes.
groundtruths (Sequence[dict]): A sequence of dict. Each dict
representing a groundtruths for an image, with the following
keys:
- instances (List[dict]): Each dict representing a bounding
box with the following keys:
- bbox (list): Containing box location in 'xyxy' foramrt.
- bbox_label (int): Box label index.
- is_group_of (bool): Whether the box is group or not.
- image_level_labels (numpy.ndarray): The image level labels.
"""
for pred, groundtruth in zip(predictions, groundtruths):
assert isinstance(pred, dict), 'The prediciton should be ' \
f'a sequence of dict, but got a sequence of {type(pred)}.'
assert isinstance(groundtruth, dict), 'The groundtruth should ' \
f'be a sequence of dict, but got a sequence of {type(groundtruth)}.' # noqa: E501
instances = copy.deepcopy(groundtruth['instances'])
pred = copy.deepcopy(pred)
if self.get_supercategory:
supercat_instances = self._extend_supercategory_ann(instances)
instances.extend(supercat_instances)
gt_ann = self._stack_gt_instances(instances)
if 'image_level_labels' not in groundtruth:
allowed_labels = np.unique(gt_ann['labels'])
else:
allowed_labels = np.unique(
np.append(gt_ann['labels'],
groundtruth['image_level_labels']))
pred = self._process_prediction(pred, allowed_labels)
self._results.append((pred, gt_ann))
@staticmethod
def _calculate_image_tpfp( # type: ignore
pred_bboxes: np.ndarray, gt_bboxes: np.ndarray,
group_of_flags: np.ndarray, use_group_of: bool,
iof_thrs: List[float], iou_thrs: List[float],
area_ranges: List[Tuple[Optional[float], Optional[float]]],
use_legacy_coordinate: bool) -> Tuple:
"""Calculate the true positive and false positive on an image.
This is an overridden method of :class:`VOCMeanAP`.
Args:
pred_bboxes (numpy.ndarray): Predicted bboxes of this image, with
shape (N, 5). The scores The predicted score of the bbox is
concatenated behind the predicted bbox.
gt_bboxes (numpy.ndarray): Ground truth bboxes of this image, with
shape (M, 4).
group_of_flags (numpy.ndarray): Flags identifies whether the gt box
is group of or not, with shape (M, ).
use_group_of (bool): Whether consider group of groud truth
bboxes during evaluating.
iof_thrs (List[float]): The IoF thresholds.
iou_thrs (List[float]): The IoU thresholds.
area_ranges (List[Tuple]): The area ranges.
use_legacy_coordinate (bool): Refer to :class:`VOCMeanAP`.
Returns:
tuple (tp, fp, pred_bboxes): The tp, fp and modified pred bboxes.
Since some predicted bboxes would be ignored, we should return the
modified predicted bboxes.
- tp (numpy.ndarray): Shape (num_ious, num_scales, N),
the true positive flag of each predicted bbox on this image.
- fp (numpy.ndarray): Shape (num_ious, num_scales, N),
the false positive flag of each predicted bbox on this image.
- pred_bboxes (numpy.ndarray): Shape (K, 5), the modified
pred bboxes.
Note:
This method should be a staticmethod to avoid resource competition
during multiple process.
"""
ignore_gt_bboxes = np.empty((0, 4))
if not use_group_of:
tp, fp = VOCMeanAP._calculate_image_tpfp(pred_bboxes, gt_bboxes,
ignore_gt_bboxes,
iou_thrs, area_ranges,
use_legacy_coordinate)
return tp, fp, pred_bboxes
non_group_gt_bboxes = gt_bboxes[~group_of_flags]
group_gt_bboxes = gt_bboxes[group_of_flags]
tp, fp = VOCMeanAP._calculate_image_tpfp(pred_bboxes,
non_group_gt_bboxes,
ignore_gt_bboxes, iou_thrs,
area_ranges,
use_legacy_coordinate)
if group_gt_bboxes.shape[0] == 0:
return tp, fp, pred_bboxes
iofs = calculate_overlaps(
pred_bboxes[:, :4],
group_gt_bboxes,
mode='iof',
use_legacy_coordinate=use_legacy_coordinate)
num_iou = len(iou_thrs)
num_scale = len(area_ranges)
pred_bboxes_group = np.zeros(
(num_iou, num_scale, iofs.shape[1], pred_bboxes.shape[1]),
dtype=float)
match_group_of = np.zeros((num_iou, num_scale, pred_bboxes.shape[0]),
dtype=bool)
tp_group = np.zeros((num_iou, num_scale, group_gt_bboxes.shape[0]),
dtype=np.float32)
iofs_max = iofs.max(axis=1)
# for each det, which gt overlaps most with it
iofs_argmax = iofs.argmax(axis=1)
# sort all dets in descending order by scores
sorted_indices = np.argsort(-pred_bboxes[:, -1])
for iof_idx, iof_thr in enumerate(iof_thrs):
for area_idx, (min_area, max_area) in enumerate(area_ranges):
box_is_covered = tp[iof_idx, area_idx]
# if no area range is specified, gt_area_ignore is all False
gt_area_mask = filter_by_bboxes_area(gt_bboxes, min_area,
max_area)
ignore_gt_area_flags = ~gt_area_mask
for pred_bbox_idx in sorted_indices:
if box_is_covered[pred_bbox_idx]:
continue
if iofs_max[pred_bbox_idx] < iof_thr:
continue
matched_gt_idx = iofs_argmax[pred_bbox_idx]
if ignore_gt_area_flags[matched_gt_idx]:
continue
if not tp_group[iof_idx, area_idx, matched_gt_idx]:
tp_group[iof_idx, area_idx, matched_gt_idx] = 1
match_group_of[iof_idx, area_idx, pred_bbox_idx] = True
else:
match_group_of[iof_idx, area_idx, pred_bbox_idx] = True
if pred_bboxes_group[iof_idx, area_idx, matched_gt_idx, -1] < pred_bboxes[pred_bbox_idx, -1]: # noqa: E501 # yapf: disable
pred_bboxes_group[iof_idx, area_idx, matched_gt_idx] = pred_bboxes[pred_bbox_idx] # noqa: E501 # yapf: disable
fp_group = (tp_group <= 0).astype(float)
tps_list = []
fps_list = []
# concatenate tp, fp, and det-boxes which not matched group of
# gt boxes and tp_group, fp_group, and pred_bboxes_group which
# matched group of boxes respectively.
for i in range(num_iou):
tps, fps = [], []
for j in range(num_scale):
tps.append(
np.concatenate(
(tp[i, j][~match_group_of[i, j]], tp_group[i, j])))
fps.append(
np.concatenate(
(fp[i, j][~match_group_of[i, j]], fp_group[i, j])))
pred_bboxes = np.concatenate(
(pred_bboxes[~match_group_of[i, j]],
pred_bboxes_group[i, j])) # noqa: E501
tps_list.append(np.vstack(tps))
fps_list.append(np.vstack(fps))
tp = np.vstack(tps_list).reshape((num_iou, num_scale, -1))
fp = np.vstack(fps_list).reshape((num_iou, num_scale, -1))
return tp, fp, pred_bboxes
[docs] def get_class_gts(self, groundtruths: List[dict],
class_index: int) -> Tuple:
"""Get prediciton gt information of a certain class index.
This is an overridden method of :class:`VOCMeanAP`.
Args:
groundtruths (list[dict]): Same as ``self.add``.
class_index (int): Index of a specific class.
Returns:
List[np.ndarray]: gt bboxes.
"""
class_gts = []
class_group_of_flags = []
for gt in groundtruths:
gt_indices = (gt['labels'] == class_index)
gt_bboxes = gt['bboxes'][gt_indices, :]
group_of_flags = gt['is_group_ofs'][gt_indices]
class_gts.append(gt_bboxes)
class_group_of_flags.append(group_of_flags)
return class_gts, class_group_of_flags
[docs] def calculate_class_tpfp(self, predictions: List[dict],
groundtruths: List[dict], class_index: int,
pool: Optional[Pool]) -> Tuple:
"""Calculate the tp and fp of the given class index.
This is an overridden method of :class:`VOCMeanAP`.
Args:
predictions (List[dict]): A list of dict. Each dict is the
detection result of an image. Same as :class:`VOCMeanAP.add`.
groundtruths (List[dict]): A list of dict. Each dict is the
ground truth of an image. Same as :class:`VOCMeanAP.add`.
class_index (int): The class index.
pool (Pool, optional): An instance of
class:`multiprocessing.Pool`. If None, do not use
multiprocessing.
Returns:
tuple (tp, fp, num_gts):
- tp (numpy.ndarray): Shape (num_ious, num_scales, num_pred),
the true positive flag of each predict bbox.
- fp (numpy.ndarray): Shape (num_ious, num_scales, num_pred),
the false positive flag of each predict bbox.
- num_gts (numpy.ndarray): Shape (num_ious, num_scales), the
number of ground truths.
"""
class_preds = self.get_class_predictions(predictions, class_index)
class_gts, class_group_of_flags = self.get_class_gts(
groundtruths, class_index)
if pool is not None:
num_images = len(class_preds)
tpfp_list = pool.starmap(
self._calculate_image_tpfp,
zip(class_preds, class_gts, class_group_of_flags,
[self.use_group_of] * num_images,
[self.iof_thrs] * num_images, [self.iou_thrs] * num_images,
[self._area_ranges] * num_images,
[self.use_legacy_coordinate] * num_images))
else:
tpfp_list = []
for img_idx in range(len(class_preds)):
tp, fp, img_pred = self._calculate_image_tpfp(
class_preds[img_idx], class_gts[img_idx],
class_group_of_flags[img_idx], self.use_group_of,
self.iof_thrs, self.iou_thrs, self._area_ranges,
self.use_legacy_coordinate)
tpfp_list.append((tp, fp, img_pred))
image_tp_list, image_fp_list, class_preds = tuple(zip(*tpfp_list))
sorted_indices = np.argsort(-np.vstack(class_preds)[:, -1])
tp = np.concatenate(image_tp_list, axis=2)[..., sorted_indices]
fp = np.concatenate(image_fp_list, axis=2)[..., sorted_indices]
num_gts = np.zeros((self.num_iou, self.num_scale), dtype=int)
for idx, (min_area, max_area) in enumerate(self._area_ranges):
area_mask = filter_by_bboxes_area(
np.vstack(class_gts), min_area, max_area)
num_gts[:, idx] = np.sum(area_mask)
return tp, fp, num_gts