error_utils.py 8.67 KB
Newer Older
1
#!/usr/bin/env python
Amir MOHAMMADI's avatar
nit  
Amir MOHAMMADI committed
2 3
# Ivana Chingovska <ivana.chingovska@idiap.ch>
# Fri Dec  7 12:33:37 CET 2012
4 5
"""Utility functions for computation of EPSC curve and related measurement"""

6 7 8 9 10 11 12 13
from bob.measure import (
    far_threshold,
    eer_threshold,
    min_hter_threshold,
    farfrr,
    frr_threshold,
)
from bob.bio.base.score.load import _iterate_csv_score_file
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
14 15
from collections import defaultdict
import re
16 17 18 19
import numpy
import logging

logger = logging.getLogger(__name__)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
20 21


Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
22
def calc_threshold(method, pos, negs, all_negs, far_value=None, is_sorted=False):
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
23 24 25 26 27
    """Calculates the threshold based on the given method.

    Parameters
    ----------
    method : str
Anjith GEORGE's avatar
Anjith GEORGE committed
28
        One of ``bpcer20``, ``eer``, ``min-hter``, ``apcer20``.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
29 30
    pos : array_like
        The positive scores. They should be sorted!
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
31 32 33 34 35 36 37 38 39 40
    negs : list
        A list of array_like negative scores. Each item in the list corresponds to
        scores of one PAI.
    all_negs : array_like
        An array of all negative scores. This can be calculated from negs as well but we
        ask for it since you might have it already calculated.
    far_value : None, optional
        If method is far, far_value and all_negs are used to calculate the threshold.
    is_sorted : bool, optional
        If True, it means all scores are sorted and no sorting will happen.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
41 42 43 44 45 46 47 48 49 50 51 52

    Returns
    -------
    float
        The calculated threshold.

    Raises
    ------
    ValueError
        If method is unknown.
    """
    method = method.lower()
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
53 54 55
    if "bpcer" in method:
        desired_apcer = 1 / float(method.replace("bpcer", ""))
        threshold = apcer_threshold(desired_apcer, pos, *negs, is_sorted=is_sorted)
Anjith GEORGE's avatar
Anjith GEORGE committed
56 57
    elif "apcer" in method:
        desired_bpcer = 1 / float(method.replace("apcer", ""))
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
58
        threshold = frr_threshold(all_negs, pos, desired_bpcer, is_sorted=is_sorted)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
59 60 61 62 63 64
    elif method == "far":
        threshold = far_threshold(all_negs, pos, far_value, is_sorted=is_sorted)
    elif method == "eer":
        threshold = eer_threshold(all_negs, pos, is_sorted=is_sorted)
    elif method == "min-hter":
        threshold = min_hter_threshold(all_negs, pos, is_sorted=is_sorted)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
65 66 67 68
    else:
        raise ValueError("Unknown threshold criteria: {}".format(method))

    return threshold
69 70


Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
def apcer_threshold(desired_apcer, pos, *negs, is_sorted=False):
    """Computes the threshold given the desired APCER as the criteria.

    APCER is computed as max of all APCER_PAI values.
    The threshold will be computed such that the real APCER is **at most** the desired
    value.

    Parameters
    ----------
    desired_apcer : float
        The desired APCER value.
    pos : list
        An array or list of positive scores in float.
    *negs
        A list of negative scores. Each item corresponds to the negative scores of one
        PAI.
    is_sorted : bool, optional
        Set to ``True`` if ALL arrays (pos and negs) are sorted.

    Returns
    -------
    float
        The computed threshold that satisfies the desired APCER.
    """
    threshold = max(
        far_threshold(neg, pos, desired_apcer, is_sorted=is_sorted) for neg in negs
    )
    return threshold


def apcer_bpcer(threshold, pos, *negs):
    """Computes APCER_PAI, APCER, and BPCER given the positive scores and a list of
    negative scores and a threshold.

    Parameters
    ----------
    threshold : float
        The threshold to be used to compute the error rates.
    pos : list
        An array or list of positive scores in float.
    *negs
        A list of negative scores. Each item corresponds to the negative scores of one
        PAI.

    Returns
    -------
    tuple
        A tuple such as (list of APCER_PAI, APCER, BPCER)
    """
    apcers = []
    assert len(negs) > 0, negs
    for neg in negs:
        far, frr = farfrr(neg, pos, threshold)
        apcers.append(far)
    bpcer = frr  # bpcer will be the same in all cases
    return apcers, max(apcers), bpcer


129
def split_csv_pad_per_pai(filename, regexps=[], regexp_column="attack_type"):
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
130 131 132 133
    """Returns scores for Bona-Fide samples and scores for each PAI.
    By default, the real_id column (second column) is used as indication for each
    Presentation Attack Instrument (PAI).

134 135 136 137 138 139 140 141 142 143 144
    For example, with default regexps and regexp_column, if you have scores like:
        claimed_id, test_label,              is_bonafide, attack_type, score
        001,        bona_fide_sample_1_path, True,        ,            0.9
        001,        print_sample_1_path,     False,       print,       0.6
        001,        print_sample_2_path,     False,       print,       0.6
        001,        replay_sample_1_path,    False,       replay,      0.2
        001,        replay_sample_2_path,    False,       replay,      0.2
        001,        mask_sample_1_path,      False,       mask,        0.5
        001,        mask_sample_2_path,      False,       mask,        0.5
    this function will return 1 set of positive scores, and 3 sets of negative scores
    (for each print, replay, and mask PAIs).
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
145 146

    Otherwise, you can provide a list regular expressions that match each PAI.
147 148 149 150 151 152 153 154 155 156 157
    For example, with regexps as ['print', 'replay', 'mask'], if you have scores like:
        claimed_id, test_label,              is_bonafide, attack_type, score
        001,        bona_fide_sample_1_path, True,        ,            0.9
        001,        print_sample_1_path,     False,       print/1,     0.6
        001,        print_sample_2_path,     False,       print/2,     0.6
        001,        replay_sample_1_path,    False,       replay/1,    0.2
        001,        replay_sample_2_path,    False,       replay/2,    0.2
        001,        mask_sample_1_path,      False,       mask/1,      0.5
        001,        mask_sample_2_path,      False,       mask/2,      0.5
    the function will return 3 sets of negative scores (for print, replay, and mask
    PAIs, given in regexp).
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
158 159 160 161 162 163


    Parameters
    ----------
    filename : str
        Path to the score file.
164
    regexps : List of str, optional
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
165
        A list of regular expressions that match each PAI. If not given, the values in
166
        the column pointed by regexp_column are used to find scores for different PAIs.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
167 168
    regexp_column : str, optional
        If a list of regular expressions are given, those patterns will be matched
169
        against the values in this column. default: ``attack_type``
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
170 171 172

    Returns
    -------
173
    tuple (positives, {'pai_name': negatives})
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
174 175 176 177 178 179 180
        A tuple containing pos scores and a dict of negative scores mapping PAIs to
        their scores.

    Raises
    ------
    ValueError
        If none of the given regular expressions match the values in regexp_column.
181 182
    KeyError
        If regexp_column is not a column of the CSV file.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
183 184 185 186 187 188
    """
    pos = []
    negs = defaultdict(list)
    if regexps:
        regexps = [re.compile(pattern) for pattern in regexps]

189
    for row in _iterate_csv_score_file(filename):
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
190
        # if it is a Bona-Fide score
191 192
        if row["is_bonafide"].lower() == "true":
            pos.append(row["score"])
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
193 194
            continue
        if not regexps:
195
            negs[row[regexp_column]].append(row["score"])
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
196 197 198
            continue
        # if regexps is not None or empty and is not a Bona-Fide score
        for pattern in regexps:
199 200
            if pattern.match(row[regexp_column]):
                negs[pattern.pattern].append(row["score"])
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
201 202 203
                break
        else:  # this else is for the for loop: ``for pattern in regexps:``
            raise ValueError(
204 205
                f"No regexps: {regexps} match `{row[regexp_column]}' "
                f"from `{regexp_column}' column."
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
206 207
            )
    return pos, negs
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228


def split_csv_pad(filename):
    """Loads PAD scores from a CSV score file, splits them by attack vs bonafide.

    The CSV must contain a ``is_bonafide`` column with each field either
    ``True`` or ``False`` (case insensitive).

    Parameters
    ----------
    filename: str
        The path to a CSV file containing all the scores.

    Returns
    -------
    (attack, bonafide): Tuple of 1D-arrays
        The negative (attacks) and positives (bonafide) scores.
    """
    logger.debug(f"Loading CSV score file: '{filename}'")
    split_scores = defaultdict(list)
    for row in _iterate_csv_score_file(filename):
229
        if row["is_bonafide"].lower() == "true":
230 231 232 233 234 235 236 237 238 239 240
            split_scores["bonafide"].append(row["score"])
        else:
            split_scores["attack"].append(row["score"])
    logger.debug(
        f"Found {len(split_scores['attack'])} negative (attack), and"
        f"{len(split_scores['bonafide'])} positive (bonafide) scores."
    )
    # Cast the scores to numpy float
    for key, scores in split_scores.items():
        split_scores[key] = numpy.array(scores, dtype=numpy.float64)
    return split_scores["attack"], split_scores["bonafide"]