### Implemented false_alarm_rate and detection_identificaton_rate and added documentation

parent 8599cfac
 ... ... @@ -79,7 +79,7 @@ def relevance (input, machine): return retval def recognition_rate(cmc_scores, rank = 1, threshold=None): def recognition_rate(cmc_scores, threshold = None, rank = 1): """recognition_rate(cmc_scores, rank, threshold) -> RR Calculates the recognition rate from the given input, which is identical ... ... @@ -103,6 +103,10 @@ def recognition_rate(cmc_scores, rank = 1, threshold=None): For open set recognition, i.e., when there exist a tuple including negative scores without corresponding positive scores (``None``), and **all** negative scores are below ``threshold`` :math:`\\max\\{S_p^+\\} < \\theta`, the probe item is correctly rejected, **and it does not count into the denominator** :math:`P`. When no ``threshold`` is provided, the open set probes will **always** count as misclassified, regardless of the ``rank``. .. warn: For open set tests, this rate does not correspond to a standard rate. Please use :py:func:`detection_identification_rate` and :py:func:`false_alarm_rate` instead. **Parameters:** ``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))] ... ... @@ -110,13 +114,13 @@ def recognition_rate(cmc_scores, rank = 1, threshold=None): Each pair contains the ``negative`` and the ``positive`` scores for **one probe item**. Each pair can contain up to one empty array (or ``None``), i.e., in case of open set recognition. ``rank`` : int or ``None`` The rank for which the recognition rate should be computed, 1 by default. ``threshold`` : float or ``None`` Decision threshold. If not ``None``, **all** scores will be filtered by the threshold. In an open set recognition problem, all open set scores (negatives with no corresponding positive) for which all scores are below threshold, will be counted as correctly rejected and **removed** from the probe list (i.e., the denominator). ``rank`` : int or ``None`` The rank for which the recognition rate should be computed, 1 by default. **Returns:** ``RR`` : float ... ... @@ -159,7 +163,7 @@ def recognition_rate(cmc_scores, rank = 1, threshold=None): # we have a positive, so we need to count the probe counter += 1 if not pos.size: if not numpy.array(pos).size: # all positive scores have been filtered out by the threshold, we definitely have a mis-match continue ... ... @@ -167,7 +171,7 @@ def recognition_rate(cmc_scores, rank = 1, threshold=None): # (usually, there is only one positive score, but just in case...) max_pos = numpy.max(pos) if neg is None or not neg.size: if neg is None or not numpy.array(neg).size: # if we had no negatives, or all negatives were below threshold, we have a match at rank 1 correct += 1 else: ... ... @@ -179,31 +183,32 @@ def recognition_rate(cmc_scores, rank = 1, threshold=None): return float(correct) / float(counter) def cmc(cmc_scores, threshold = None): def cmc(cmc_scores): """cmc(cmc_scores) -> curve Calculates the cumulative match characteristic (CMC) from the given input. The input has a specific format, which is a list of two-element tuples. Each of the tuples contains the negative and the positive scores for one test of the tuples contains the negative and the positive scores for one probe item. To read the lists from score files in 4 or 5 column format, please use the :py:func:`bob.measure.load.cmc_four_column` or :py:func:`bob.measure.load.cmc_five_column` function. For each test item the probability that the rank r of the positive score is For each probe item the probability that the rank :math:`r` of the positive score is calculated. The rank is computed as the number of negative scores that are higher than the positive score. If several positive scores for one test item exist, the **highest** positive score is taken. The CMC finally computes how many test items have rank r or higher, divided by the total number of test values. .. note:: The CMC is not available for open set classification. Please use the :py:func:`detection_identification_rate` and :py:func:`false_alarm_rate` instead. **Parameters:** ``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))] A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database ``threshold`` : float or ``None`` Decision threshold. If ``None``, the decision threshold will be the **highest** positive score. **Returns:** ``curve`` : array_like(2D, float) ... ... @@ -216,11 +221,13 @@ def cmc(cmc_scores, threshold = None): raise ValueError("The given set of scores is empty") # compute MC match_characteristic = numpy.zeros((max([len(neg) for (neg,pos) in cmc_scores])+1,), numpy.int) match_characteristic = numpy.zeros((max([len(neg) for neg, _ in cmc_scores if neg is not None])+1,), numpy.int) for neg, pos in cmc_scores: if((type(pos)!=float) and (len(pos) == 0)): if pos is None or not numpy.array(pos).size: raise ValueError("For the CMC computation at least one positive score per pair is necessary.") if neg is None: neg = [] # get the maximum positive score for the current probe item # (usually, there is only one positive score, but just in case...) ... ... @@ -228,17 +235,110 @@ def cmc(cmc_scores, threshold = None): # count the number of negative scores that are higher than the best positive score index = numpy.sum(neg >= max_pos) if threshold is None or threshold <= max_pos: match_characteristic[index] += 1 match_characteristic[index] += 1 # cumulate cumulative_match_characteristic = numpy.ndarray(match_characteristic.shape, numpy.float64) count = 0. for i in range(match_characteristic.shape): count += match_characteristic[i] cumulative_match_characteristic[i] = count / probe_count cumulative_match_characteristic = numpy.cumsum(match_characteristic, dtype=numpy.float64) return cumulative_match_characteristic / probe_count def detection_identification_rate(cmc_scores, threshold, rank = 1): """detection_identification_rate(cmc_scores, threshold, rank) -> dir Computes the `detection and identification rate` for the given threshold. This value is designed to be used in an open set identification protocol, and defined in Chapter 14.1 of [LiJain2005]_. Although the detection and identification rate is designed to be computed on an open set protocol, it uses only the probe elements, for which a corresponding gallery element exists. For closed set identification protocols, this function is identical to :py:func:`recognition_rate`. The only difference is that for this function, a ``threshold`` for the scores need to be defined, while for :py:func:`recognition_rate` it is optional. **Parameters:** ``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))] CMC scores loaded with one of the functions (:py:func:`bob.measure.load.cmc_four_column` or :py:func:`bob.measure.load.cmc_five_column`). Each pair contains the ``negative`` and the ``positive`` scores for **one probe item**. There need to be at least one probe item, for which positive and negative scores exist. ``threshold`` : float The decision threshold :math:`\\tau``. ``rank`` : int The rank for which the curve should be plotted, by default 1. **Returns:** ``dir`` : float The detection and identification rate for the given threshold. """ # count the correctly classifier probes correct = 0 counter = 0 for neg, pos in cmc_scores: if pos is None or not numpy.array(pos).size: # we only consider probes with corresponding gallery items continue # we have an in-gallery probe counter += 1 # check, if it is correctly classified if neg is None: neg = [] # get the maximum positive score for the current probe item # (usually, there is only one positive score, but just in case...) max_pos = numpy.max(pos) index = numpy.sum(neg >= max_pos) # compute the rank (in fact, rank - 1) if max_pos >= threshold and index < rank: correct += 1 if not counter: logger.warn("No in-gallery probe was found") return 0. return float(correct) / float(counter) def false_alarm_rate(cmc_scores, threshold): """false_alarm_rate(cmc_scores, threshold) -> far Computes the `false alarm rate` for the given threshold,. This value is designed to be used in an open set identification protocol, and defined in Chapter 14.1 of [LiJain2005]_. The false alarm rate is designed to be computed on an open set protocol, it uses only the probe elements, for which **no** corresponding gallery element exists. **Parameters:** ``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))] CMC scores loaded with one of the functions (:py:func:`bob.measure.load.cmc_four_column` or :py:func:`bob.measure.load.cmc_five_column`). Each pair contains the ``negative`` and the ``positive`` scores for **one probe item**. There need to be at least one probe item, for which only negative scores exist. ``threshold`` : float The decision threshold :math:`\\tau``. **Returns:** ``far`` : float The false alarm rate. """ incorrect = 0 counter = 0 for neg, pos in cmc_scores: # we only consider the out-of-gallery probes, i.e., with no positive scores if pos is None or not numpy.array(pos).size: counter += 1 # check if the probe is above threshold if neg is None or not numpy.array(neg).size: raise ValueError("One pair of the CMC scores has neither positive nor negative values") if numpy.max(neg) >= threshold: incorrect += 1 if not counter: logger.warn("No out-of-gallery probe was found") return 0. return cumulative_match_characteristic return float(incorrect) / float(counter) def get_config(): ... ...
 ... ... @@ -407,20 +407,26 @@ def cmc(cmc_scores, logx = True, **kwargs): return len(out) def detection_identification_rate(cmc_scores, far_values = log_values(), rank = None, logx = True, **kwargs): """Plots the Detection & Identification rate curve over the FAR for the given FAR values. def detection_identification_curve(cmc_scores, far_values = log_values(), rank = 1, logx = True, **kwargs): """Plots the Detection & Identification curve over the FAR for the given FAR values. This curve is designed to be used in an open set identification protocol, and defined in Chapter 14.1 of [LiJain2005]_. It requires to have at least one open set probe item, i.e., with no corresponding gallery, such that the positives for that pair are ``None``. The detection and identification curve first computes FAR thresholds based on the out-of-set probe scores (negative scores). For each probe item, the **maximum** negative score is used. Then, it plots the detection and identification rates for those thresholds, which are based on the in-set probe scores only. See [LiJain2005]_ for more details. **Parameters:** ``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))] See :py:func:`bob.measure.cmc` See :py:func:`bob.measure.detection_identification_rate` ``far_values`` : [float] The values for the FAR, where the CAR should be plotted; each value should be in range [0,1]. ``rank`` : int or ``None`` The rank for which the curve should be plotted. If ``None``, rank 1 is assumed. The rank for which the curve should be plotted, 1 by default. ``logx`` : bool Plot the FAR axis in logarithmic scale using :py:func:`matplotlib.pyplot.semilogx` or in linear scale using :py:func:`matplotlib.pyplot.plot`? (Default: ``True``) ... ... @@ -435,17 +441,20 @@ def detection_identification_rate(cmc_scores, far_values = log_values(), rank = .. [LiJain2005] **Stan Li and Anil K. Jain**, *Handbook of Face Recognition*, Springer, 2005 """ import numpy from matplotlib import pyplot from . import far_threshold, recognition_rate from . import far_threshold, detection_identification_rate # get all negative scores and sort them to compute the FAR thresholds negatives = sorted(n for neg,pos in cmc_scores for n in neg) # for each probe, for which no positives exists, get the highest negative score; and sort them to compute the FAR thresholds negatives = sorted(max(neg) for neg,pos in cmc_scores if (pos is None or not numpy.array(pos).size) and neg is not None) if not negatives: raise ValueError("There need to be at least one pair with only negative scores") # compute thresholds based on FAR values thresholds = [far_threshold(negatives, [], v, True) for v in far_values] # compute recognition rates based on threshold for the given rank rates = [100.*recognition_rate(cmc_scores, rank, t) for t in thresholds] # compute detection and identification rate based on the thresholds for the given rank rates = [100.*detection_identification_rate(cmc_scores, t, rank) for t in thresholds] # plot curve if logx: ... ...
 ... ... @@ -85,7 +85,7 @@ def main(command_line_options = None): mpl.xticks(ticks, ticks) mpl.xlim([1, max_rank]) else: plot.detection_identification_rate(data, rank = args.rank, color=(0,0,1), linestyle='--', dashes=(6,2), logx = args.log_x_scale) plot.detection_identification_curve(data, rank = args.rank, color=(0,0,1), linestyle='--', dashes=(6,2), logx = args.log_x_scale) mpl.title("Detection & Identification Curve") if args.log_x_scale: mpl.xlabel('False Acceptance Rate (log) in %') ... ...
 ... ... @@ -268,11 +268,9 @@ def test_cmc(): from . import recognition_rate, cmc, load def n(*args): return numpy.array(args) # tests the CMC calculation # test data; should give match characteristics [1/2,1/4,1/3] and CMC [1/3,2/3,1] test_data = [(n(0.3, 1.1, 0.5), n(0.7)), (n(1.4, -1.3, 0.6), n(0.2)), (n(0.8, 0., 1.5), n(-0.8, 1.8)), (n(2., 1.3, 1.6, 0.9), n(2.4))] test_data = [((0.3, 1.1, 0.5), (0.7,)), ((1.4, -1.3, 0.6), (0.2,)), ((0.8, 0., 1.5), (-0.8, 1.8)), ((2., 1.3, 1.6, 0.9), (2.4,))] # compute recognition rate rr = recognition_rate(test_data) nose.tools.eq_(rr, 0.5) ... ... @@ -326,30 +324,29 @@ def test_calibration(): def test_open_set_recognition_rate(): far_value = 0.01 def test_open_set_rates(): # No error files cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set.txt")) normal_scores = bob.measure.load.split_four_column(F("scores-cmc-4col-open-set.txt")) assert abs(bob.measure.recognition_rate(cmc_scores) - 1.0) < 1e-8 assert abs(bob.measure.detection_identification_rate(cmc_scores, threshold=0.5) - 1.0) < 1e-8 assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8 assert abs(bob.measure.recognition_rate(cmc_scores) - 7./9.) < 1e-8 assert abs(bob.measure.recognition_rate(cmc_scores, threshold=0.5) - 1.0) < 1e-8 t = bob.measure.far_threshold(normal_scores, normal_scores,far_value) assert abs(bob.measure.recognition_rate(cmc_scores, threshold=t) - 1.0) < 1e-8 # One error cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set-one-error.txt")) normal_scores = bob.measure.load.split_four_column(F("scores-cmc-4col-open-set-one-error.txt")) assert abs(bob.measure.recognition_rate(cmc_scores) - 0.857142857143) < 1e-8 assert abs(bob.measure.recognition_rate(cmc_scores, threshold=0.5) - 0.857142857143) < 1e-8 t = bob.measure.far_threshold(normal_scores, normal_scores,far_value) assert abs(bob.measure.recognition_rate(cmc_scores, threshold=t) - 0.857142857143) < 1e-8 assert abs(bob.measure.detection_identification_rate(cmc_scores, threshold=0.5) - 6./7.) < 1e-8 assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8 assert abs(bob.measure.recognition_rate(cmc_scores) - 6./9.) < 1e-8 assert abs(bob.measure.recognition_rate(cmc_scores, threshold=0.5) - 6./7.) < 1e-8 # Two errors cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set-two-errors.txt")) normal_scores = bob.measure.load.split_four_column(F("scores-cmc-4col-open-set-two-errors.txt")) assert abs(bob.measure.recognition_rate(cmc_scores) - 0.857142857143) < 1e-8 assert abs(bob.measure.recognition_rate(cmc_scores, threshold=0.5) - 0.857142857143) < 1e-8 t = bob.measure.far_threshold(normal_scores, normal_scores,far_value) assert abs(bob.measure.recognition_rate(cmc_scores, threshold=t)) < 1e-8 assert abs(bob.measure.detection_identification_rate(cmc_scores, threshold=0.5) - 6./7.) < 1e-8 assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5) - 0.5) < 1e-8 assert abs(bob.measure.recognition_rate(cmc_scores) - 6./9.) < 1e-8 assert abs(bob.measure.recognition_rate(cmc_scores, threshold=0.5) - 6./8.) < 1e-8
 ... ... @@ -25,6 +25,8 @@ TEST_SCORES_5COL = F('test-5col.txt') SCORES_4COL_CMC = F('scores-cmc-4col.txt') SCORES_5COL_CMC = F('scores-cmc-5col.txt') SCORES_4COL_CMC_OS = F('scores-cmc-4col-open-set.txt') def test_compute_perf(): # sanity checks ... ... @@ -68,8 +70,9 @@ def test_compute_cmc(): # sanity checks assert os.path.exists(SCORES_4COL_CMC) assert os.path.exists(SCORES_5COL_CMC) assert os.path.exists(SCORES_4COL_CMC_OS) from .script.plot_cmc import main nose.tools.eq_(main(['--self-test', '--score-file', SCORES_4COL_CMC, '--log-x-scale']), 0) nose.tools.eq_(main(['--self-test', '--score-file', SCORES_5COL_CMC, '--parser', '5column']), 0) nose.tools.eq_(main(['--self-test', '--score-file', SCORES_4COL_CMC, '--rank', '1']), 0) nose.tools.eq_(main(['--self-test', '--score-file', SCORES_4COL_CMC_OS, '--rank', '1']), 0)
 ... ... @@ -19,7 +19,8 @@ Methods in the :py:mod:`bob.measure` module can help you to quickly and easily evaluate error for multi-class or binary classification problems. If you are not yet familiarized with aspects of performance evaluation, we recommend the following papers for an overview of some of the methods implemented. following papers and book chapters for an overview of some of the implemented methods. * Bengio, S., Keller, M., Mariéthoz, J. (2004). `The Expected Performance Curve`_. International Conference on Machine Learning ICML Workshop on ROC ... ... @@ -27,6 +28,8 @@ following papers for an overview of some of the methods implemented. * Martin, A., Doddington, G., Kamm, T., Ordowski, M., & Przybocki, M. (1997). `The DET curve in assessment of detection task performance`_. Fifth European Conference on Speech Communication and Technology (pp. 1895-1898). * Li, S., Jain, A.K. (2005), `Handbook of Face Recognition`, Chapter 14, Springer Overview -------- ... ... @@ -105,8 +108,8 @@ defined in the first equation. loaded your scores in two 1D float64 vectors and are ready to evaluate the performance of the classifier. Evaluation ---------- Verification ------------ To count the number of correctly classified positives and negatives you can use the following techniques: ... ... @@ -171,6 +174,43 @@ calculation of the threshold: >>> t = bob.measure.min_weighted_error_rate_threshold(negatives, positives, cost, is_sorted = True) >>> assert T == t Identification -------------- For identification, the Recognition Rate is one of the standard measures. To compute recognition rates, you can use the :py:func:`bob.measure.recognition_rate` function. This function expects a relatively complex data structure, which is the same as for the `CMC`_ below. For each probe item, the scores for negative and positive comparisons are computed, and collected for all probe items: .. doctest:: >>> rr_scores = [] >>> for probe in range(10): ... pos = numpy.random.normal(1, 1, 1) ... neg = numpy.random.normal(0, 1, 19) ... rr_scores.append((neg, pos)) >>> bob.measure.recognition_rate(rr_scores, rank=1) 0.3 For open set identification, according to Li and Jain (2005) there are two different error measures defined. The first measure is the :py:func:`bob.measure.detection_identification_rate`, which counts the number of correctly classified in-gallery probe items. The second measure is the :py:func:`bob.measure.false_alarm_rate`, which counts, how often an out-of-gallery probe item was incorrectly accepted. Both rates can be computed using the same data structure, with one exception. Both functions require that at least one probe item exists, which has no according gallery item, i.e., where the positives are empty or ``None``: (continued from above...) .. doctest:: >>> for probe in range(10): ... pos = None ... neg = numpy.random.normal(-2, 1, 10) ... rr_scores.append((neg, pos)) >>> bob.measure.detection_identification_rate(rr_scores, threshold = 0, rank=1) 0.3 >>> bob.measure.false_alarm_rate(rr_scores, threshold = 0) 0.2 Plotting -------- ... ... @@ -353,8 +393,8 @@ Detection & Identification Curve ================================ The detection & identification curve is designed to evaluate open set identification tasks. It can be plotted using the :py:func:`bob.measure.plot.detection_identification_rate` function. Here, we plot the detection & identification curve for rank 1, so that the recognition rate for FAR=1 will be identical to the rank one recognition rate obtained in the CMC plot above. It can be plotted using the :py:func:`bob.measure.plot.detection_identification_curve` function, but it requires at least one open-set probe, i.e., where no corresponding positive score exists, for which the FAR values are computed. Here, we plot the detection and identification curve for rank 1, so that the recognition rate for FAR=1 will be identical to the rank one :py:func:`bob.measure.recognition_rate` obtained in the CMC plot above. .. plot:: ... ... @@ -368,8 +408,12 @@ Here, we plot the detection & identification curve for rank 1, so that the recog positives = numpy.random.normal(1, 1, 1) negatives = numpy.random.normal(0, 1, 19) cmc_scores.append((negatives, positives)) bob.measure.plot.detection_identification_rate(cmc_scores, rank=1, logx=True) pyplot.xlabel('FAR') for probe in range(10): negatives = numpy.random.normal(-1, 1, 10) cmc_scores.append((negatives, None)) bob.measure.plot.detection_identification_curve(cmc_scores, rank=1, logx=True) pyplot.xlabel('False Alarm Rate') pyplot.ylabel('Detection & Identification Rate (%)') pyplot.ylim([0,100]) ... ...
 ... ... @@ -28,6 +28,8 @@ Single point measurements bob.measure.f_score bob.measure.precision_recall bob.measure.recognition_rate bob.measure.detection_identification_rate bob.measure.false_alarm_rate bob.measure.eer_rocch Thresholds ... ... @@ -92,7 +94,7 @@ Plotting bob.measure.plot.epc bob.measure.plot.precision_recall_curve bob.measure.plot.cmc bob.measure.plot.detection_identification_rate bob.measure.plot.detection_identification_curve OpenBR conversions ------------------ ... ...
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!