Source code for mmeval.metrics.pck_accuracy

# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
from collections import OrderedDict
from typing import Dict, List, Sequence, Union

from mmeval.core.base_metric import BaseMetric
from .utils import calc_distances, distance_acc


def keypoint_pck_accuracy(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray,
                          thr: float, norm_factor: np.ndarray) -> tuple:
    """Calculate the pose accuracy of PCK for each individual keypoint and the
    averaged accuracy across all keypoints for coordinates.

    Note:
        PCK metric measures accuracy of the localization of the body joints.
        The distances between predicted positions and the ground-truth ones
        are typically normalized by the bounding box size.
        The threshold (thr) of the normalized distance is commonly set
        as 0.05, 0.1 or 0.2 etc.

        - instance number: N
        - keypoint number: K

    Args:
        pred (np.ndarray[N, K, 2]): Predicted keypoint location.
        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
        mask (np.ndarray[N, K]): Visibility of the target. False for invisible
            joints, and True for visible. Invisible joints will be ignored for
            accuracy calculation.
        thr (float): Threshold of PCK calculation.
        norm_factor (np.ndarray[N, 2]): Normalization factor for H&W.

    Returns:
        tuple: A tuple containing keypoint accuracy.

        - acc (np.ndarray[K]): Accuracy of each keypoint.
        - avg_acc (float): Averaged accuracy across all keypoints.
        - cnt (int): Number of valid keypoints.
    """
    distances = calc_distances(pred, gt, mask, norm_factor)
    acc = np.array([distance_acc(d, thr) for d in distances])
    valid_acc = acc[acc >= 0]
    cnt = len(valid_acc)
    avg_acc = valid_acc.mean() if cnt > 0 else 0
    return acc, avg_acc, cnt


[docs]class PCKAccuracy(BaseMetric):
    """PCK accuracy evaluation metric, which is widely used in pose estimation.

    Calculate the pose accuracy of Percentage of Correct Keypoints (PCK) for
    each individual keypoint and the averaged accuracy across all keypoints.
    PCK metric measures the accuracy of the localization of the body joints.
    The distances between predicted positions and the ground-truth ones
    are typically normalized by the person bounding box size.
    The threshold (thr) of the normalized distance is commonly set
    as 0.05, 0.1 or 0.2 etc.

    Note:
        - length of dataset: N
        - num_keypoints: K
        - number of keypoint dimensions: D (typically D = 2)

    Args:
        thr(float): Threshold of PCK calculation. Defaults to 0.2.
        norm_item (str | Sequence[str]): The item used for normalization.
            Valid items include 'bbox', 'head', 'torso', which correspond
            to 'PCK', 'PCKh' and 'tPCK' respectively. Defaults to ``'bbox'``.
        **kwargs: Keyword parameters passed to :class:`BaseMetric`.

    Examples:

        >>> from mmeval import PCKAccuracy
        >>> import numpy as np
        >>> num_keypoints = 15
        >>> keypoints = np.random.random((1, num_keypoints, 2)) * 10
        >>> predictions = [{'coords': keypoints}]
        >>> keypoints_visible = np.ones((1, num_keypoints)).astype(bool)
        >>> bbox_size = np.random.random((1, 2)) * 10
        >>> groundtruths = [{
        ...     'coords': keypoints,
        ...     'mask': keypoints_visible,
        ...     'bbox_size': bbox_size,
        ... }]
        >>> pck_metric = PCKAccuracy(thr=0.5, norm_item='bbox')
        >>> pck_metric(predictions, groundtruths)
        OrderedDict([('PCK@0.5', 1.0)])
    """

    def __init__(self,
                 thr: float = 0.2,
                 norm_item: Union[str, Sequence[str]] = 'bbox',
                 **kwargs) -> None:
        super().__init__(**kwargs)
        self.thr = thr
        if isinstance(norm_item, str):
            norm_item = [norm_item]
        self.norm_item = norm_item

        allow_normalized_items = ['bbox', 'head', 'torso']
        for item in self.norm_item:
            if item not in allow_normalized_items:
                raise KeyError(
                    f'The normalized item {item} is not supported by '
                    f"{self.__class__.__name__}. Should be one of 'bbox', "
                    f"'head', 'torso', but got {item}.")

[docs]    def add(self, predictions: List[Dict], groundtruths: List[Dict]) -> None:  # type: ignore # yapf: disable # noqa: E501
        """Process one batch of predictions and groundtruths and add the
        intermediate results to `self._results`.

        Args:
            predictions (Sequence[dict]): Predictions from the model.
                Each prediction dict has the following keys:

                - coords (np.ndarray, [1, K, D]): predicted keypoints
                  coordinates

            groundtruths (Sequence[dict]): The ground truth labels.
                Each groundtruth dict has the following keys:

                - coords (np.ndarray, [1, K, D]): ground truth keypoints
                  coordinates
                - mask (np.ndarray, [1, K]): ground truth keypoints_visible
                - bbox_size (np.ndarray, optional, [1, 2]): ground truth
                  bbox size
                - head_size (np.ndarray, optional, [1, 2]): ground truth
                  head size
                - torso_size (np.ndarray, optional, [1, 2]): ground truth
                  torso size
        """
        for prediction, groundtruth in zip(predictions, groundtruths):
            self._results.append((prediction, groundtruth))

[docs]    def compute_metric(self, results: list) -> Dict[str, float]:
        """Compute the metrics from processed results.

        Args:
            results (list): The processed results of each batch.

        Returns:
            Dict[str, float]: The computed metrics. The keys are the names of
            the metrics, and the values are the corresponding results.
        """
        # split gt and prediction list
        preds, gts = zip(*results)

        # pred_coords: [N, K, D]
        pred_coords = np.concatenate([pred['coords'] for pred in preds])
        # gt_coords: [N, K, D]
        gt_coords = np.concatenate([gt['coords'] for gt in gts])
        # mask: [N, K]
        mask = np.concatenate([gt['mask'] for gt in gts])

        metric_results: OrderedDict = OrderedDict()

        if 'bbox' in self.norm_item:
            norm_size_bbox = np.concatenate([gt['bbox_size'] for gt in gts])

            self.logger.info(f'Evaluating {self.__class__.__name__} '
                             f'(normalized by ``"bbox_size"``)...')

            _, pck, _ = keypoint_pck_accuracy(pred_coords, gt_coords, mask,
                                              self.thr, norm_size_bbox)
            metric_results[f'PCK@{self.thr}'] = pck

        if 'head' in self.norm_item:
            norm_size_head = np.concatenate([gt['head_size'] for gt in gts])

            self.logger.info(f'Evaluating {self.__class__.__name__} '
                             f'(normalized by ``"head_size"``)...')

            _, pckh, _ = keypoint_pck_accuracy(pred_coords, gt_coords, mask,
                                               self.thr, norm_size_head)
            metric_results[f'PCKh@{self.thr}'] = pckh

        if 'torso' in self.norm_item:
            norm_size_torso = np.concatenate([gt['torso_size'] for gt in gts])

            self.logger.info(f'Evaluating {self.__class__.__name__} '
                             f'(normalized by ``"torso_size"``)...')

            _, tpck, _ = keypoint_pck_accuracy(pred_coords, gt_coords, mask,
                                               self.thr, norm_size_torso)
            metric_results[f'tPCK@{self.thr}'] = tpck

        return metric_results


[docs]class MpiiPCKAccuracy(PCKAccuracy):
    """PCKh accuracy evaluation metric for MPII dataset.

    Calculate the pose accuracy of Percentage of Correct Keypoints (PCK) for
    each individual keypoint and the averaged accuracy across all keypoints.
    PCK metric measures accuracy of the localization of the body joints.
    The distances between predicted positions and the ground-truth ones
    are typically normalized by the person bounding box size.
    The threshold (thr) of the normalized distance is commonly set
    as 0.05, 0.1 or 0.2 etc.

    Note:
        - length of dataset: N
        - num_keypoints: K
        - number of keypoint dimensions: D (typically D = 2)

    Args:
        thr(float): Threshold of PCK calculation. Defaults to 0.5.
        norm_item (str | Sequence[str]): The item used for normalization.
            Valid items include 'bbox', 'head', 'torso', which correspond
            to 'PCK', 'PCKh' and 'tPCK' respectively. Defaults to ``'head'``.
        **kwargs: Keyword parameters passed to :class:`BaseMetric`.

    Examples:

        >>> from mmeval import MpiiPCKAccuracy
        >>> import numpy as np
        >>> num_keypoints = 16
        >>> keypoints = np.random.random((1, num_keypoints, 2)) * 10
        >>> predictions = [{'coords': keypoints}]
        >>> keypoints_visible = np.ones((1, num_keypoints)).astype(bool)
        >>> head_size = np.random.random((1, 2)) * 10
        >>> groundtruths = [{
        ...     'coords': keypoints + 1.0,
        ...     'mask': keypoints_visible,
        ...     'head_size': head_size,
        ... }]
        >>> mpii_pckh_metric = MpiiPCKAccuracy(thr=0.3, norm_item='head')
        >>> mpii_pckh_metric(predictions, groundtruths)
        OrderedDict([('Head', 100.0), ('Shoulder', 100.0), ('Elbow', 100.0),
        ('Wrist', 100.0), ('Hip', 100.0), ('Knee', 100.0), ('Ankle', 100.0),
        ('PCKh', 100.0), ('PCKh@0.1', 100.0)])
    """

    def __init__(self,
                 thr: float = 0.5,
                 norm_item: Union[str, Sequence[str]] = 'head',
                 **kwargs) -> None:
        super().__init__(thr=thr, norm_item=norm_item, **kwargs)

[docs]    def compute_metric(self, results: list) -> Dict[str, float]:
        """Compute the metrics from processed results.

        Args:
            results (list): The processed results of each batch.

        Returns:
            Dict[str, float]: The computed metrics. The keys are the names of
            the metrics, and the values are corresponding results.
        """
        # split gt and prediction list
        preds, gts = zip(*results)

        # pred_coords: [N, K, D]
        pred_coords = np.concatenate([pred['coords'] for pred in preds])
        # gt_coords: [N, K, D]
        gt_coords = np.concatenate([gt['coords'] for gt in gts])
        # mask: [N, K]
        mask = np.concatenate([gt['mask'] for gt in gts])

        # MPII uses matlab format, gt index is 1-based,
        # convert 0-based index to 1-based index
        pred_coords = pred_coords + 1.0

        metric_results = super().compute_metric(results)

        if 'head' in self.norm_item:
            norm_size_head = np.concatenate([gt['head_size'] for gt in gts])

            self.logger.info(f'Evaluating {self.__class__.__name__} '
                             f'(normalized by ``"head_size"``)...')

            pck_p, _, _ = keypoint_pck_accuracy(pred_coords, gt_coords, mask,
                                                self.thr, norm_size_head)

            jnt_count = np.sum(mask, axis=0)
            PCKh = 100. * pck_p

            rng = np.arange(0, 0.5 + 0.01, 0.01)
            pckAll = np.zeros((len(rng), 16), dtype=np.float32)

            for r, threshold in enumerate(rng):
                _pck, _, _ = keypoint_pck_accuracy(pred_coords, gt_coords,
                                                   mask, threshold,
                                                   norm_size_head)
                pckAll[r, :] = 100. * _pck

            PCKh = np.ma.array(PCKh, mask=False)
            PCKh.mask[6:8] = True

            jnt_count = np.ma.array(jnt_count, mask=False)
            jnt_count.mask[6:8] = True
            jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)

            # dataset_joints_idx:
            #   head 9
            #   lsho 13  rsho 12
            #   lelb 14  relb 11
            #   lwri 15  rwri 10
            #   lhip 3   rhip 2
            #   lkne 4   rkne 1
            #   lank 5   rank 0
            stats = {
                'Head': PCKh[9],
                'Shoulder': 0.5 * (PCKh[13] + PCKh[12]),
                'Elbow': 0.5 * (PCKh[14] + PCKh[11]),
                'Wrist': 0.5 * (PCKh[15] + PCKh[10]),
                'Hip': 0.5 * (PCKh[3] + PCKh[2]),
                'Knee': 0.5 * (PCKh[4] + PCKh[1]),
                'Ankle': 0.5 * (PCKh[5] + PCKh[0]),
                'PCKh': np.sum(PCKh * jnt_ratio),
                'PCKh@0.1': np.sum(pckAll[10, :] * jnt_ratio)
            }

            del metric_results[f'PCKh@{self.thr}']
            for stats_name, stat in stats.items():
                metric_results[stats_name] = stat

        return metric_results


[docs]class JhmdbPCKAccuracy(PCKAccuracy):
    """PCK accuracy evaluation metric for Jhmdb dataset.

    Calculate the pose accuracy of Percentage of Correct Keypoints (PCK) for
    each individual keypoint and the averaged accuracy across all keypoints.
    PCK metric measures accuracy of the localization of the body joints.
    The distances between predicted positions and the ground-truth ones
    are typically normalized by the person bounding box size.
    The threshold (thr) of the normalized distance is commonly set
    as 0.05, 0.1 or 0.2 etc.

    Note:
        - length of dataset: N
        - num_keypoints: K
        - number of keypoint dimensions: D (typically D = 2)

    Args:
        thr(float): Threshold of PCK calculation. Defaults to 0.5.
        norm_item (str | Sequence[str]): The item used for normalization.
            Valid items include 'bbox', 'head', 'torso', which correspond
            to 'PCK', 'PCKh' and 'tPCK' respectively. Defaults to ``'bbox'``.
        **kwargs: Keyword parameters passed to :class:`BaseMetric`.

    Examples:

        >>> from mmeval import JhmdbPCKAccuracy
        >>> import numpy as np
        >>> num_keypoints = 15
        >>> keypoints = np.random.random((1, num_keypoints, 2)) * 10
        >>> predictions = [{'coords': keypoints}]
        >>> keypoints_visible = np.ones((1, num_keypoints)).astype(bool)
        >>> torso_size = np.random.random((1, 2)) * 10
        >>> groundtruths = [{
        ...     'coords': keypoints,
        ...     'mask': keypoints_visible,
        ...     'torso_size': torso_size,
        ... }]
        >>> jhmdb_pckh_metric = JhmdbPCKAccuracy(thr=0.2, norm_item='torso')
        >>> jhmdb_pckh_metric(predictions, groundtruths)
        OrderedDict([('Head tPCK', 1.0), ('Sho tPCK', 1.0), ('Elb tPCK', 1.0),
        ('Wri tPCK', 1.0), ('Hip tPCK', 1.0), ('Knee tPCK', 1.0),
        ('Ank tPCK', 1.0), ('Mean tPCK', 1.0)])
    """

    def __init__(self,
                 thr: float = 0.5,
                 norm_item: Union[str, Sequence[str]] = 'bbox',
                 **kwargs) -> None:
        super().__init__(thr=thr, norm_item=norm_item, **kwargs)

[docs]    def compute_metric(self, results: list) -> Dict[str, float]:
        """Compute the metrics from processed results.

        Args:
            results (list): The processed results of each batch.

        Returns:
            Dict[str, float]: The computed metrics. The keys are the names of
            the metrics, and the values are corresponding results.
        """
        # split gt and prediction list
        preds, gts = zip(*results)

        # pred_coords: [N, K, D]
        pred_coords = np.concatenate([pred['coords'] for pred in preds])
        # gt_coords: [N, K, D]
        gt_coords = np.concatenate([gt['coords'] for gt in gts])
        # mask: [N, K]
        mask = np.concatenate([gt['mask'] for gt in gts])

        metric_results = super().compute_metric(results)

        if 'bbox' in self.norm_item:
            norm_size_bbox = np.concatenate([gt['bbox_size'] for gt in gts])

            self.logger.info(f'Evaluating {self.__class__.__name__} '
                             f'(normalized by ``"bbox_size"``)...')

            pck_p, pck, _ = keypoint_pck_accuracy(pred_coords, gt_coords, mask,
                                                  self.thr, norm_size_bbox)
            metric_results[f'PCK@{self.thr}'] = pck

            stats = {
                'Head': pck_p[2],
                'Sho': 0.5 * pck_p[3] + 0.5 * pck_p[4],
                'Elb': 0.5 * pck_p[7] + 0.5 * pck_p[8],
                'Wri': 0.5 * pck_p[11] + 0.5 * pck_p[12],
                'Hip': 0.5 * pck_p[5] + 0.5 * pck_p[6],
                'Knee': 0.5 * pck_p[9] + 0.5 * pck_p[10],
                'Ank': 0.5 * pck_p[13] + 0.5 * pck_p[14],
                'Mean': pck
            }

            for stats_name, stat in stats.items():
                metric_results[f'{stats_name} PCK'] = stat

        if 'torso' in self.norm_item:
            norm_size_torso = np.concatenate([gt['torso_size'] for gt in gts])

            self.logger.info(f'Evaluating {self.__class__.__name__} '
                             f'(normalized by ``"torso_size"``)...')

            pck_p, pck, _ = keypoint_pck_accuracy(pred_coords, gt_coords, mask,
                                                  self.thr, norm_size_torso)

            stats = {
                'Head': pck_p[2],
                'Sho': 0.5 * pck_p[3] + 0.5 * pck_p[4],
                'Elb': 0.5 * pck_p[7] + 0.5 * pck_p[8],
                'Wri': 0.5 * pck_p[11] + 0.5 * pck_p[12],
                'Hip': 0.5 * pck_p[5] + 0.5 * pck_p[6],
                'Knee': 0.5 * pck_p[9] + 0.5 * pck_p[10],
                'Ank': 0.5 * pck_p[13] + 0.5 * pck_p[14],
                'Mean': pck
            }

            del metric_results[f'tPCK@{self.thr}']
            for stats_name, stat in stats.items():
                metric_results[f'{stats_name} tPCK'] = stat

        return metric_results