Commit 5b69b076 by Manuel Günther

### Implemented RR with threshold correctly; changed load.cmc_*_column to allow...

`Implemented RR with threshold correctly; changed load.cmc_*_column to allow empty (None) positive and negative scores`
parent 9c5ef6c4
 ... ... @@ -124,20 +124,46 @@ def recognition_rate(cmc_scores, rank = None, threshold=None): rank = 1 correct = 0 counter = 0 for neg, pos in cmc_scores: if((type(pos)!=float) and (len(pos) == 0)): raise ValueError("For the CMC computation at least one positive score per pair is necessary.") if pos is None and neg is None: raise ValueError("One pair of the CMC scores has neither positive nor negative values") # filter out any negative or positive scores below threshold if threshold is not None and neg is not None: neg = numpy.array(neg[neg >= threshold]) if pos is None: # no positives, so we definitely do not have a match; # check if we have negatives above threshold if not neg.ndim: # we have no negative scores over the threshold, so we have correctly rejected the probe # don't increase any of the two counters... continue # we have negatives over threshold, so we have incorrect classifications; independent on the actual rank counter += 1 else: # we have a positive, so we need to count the probe counter += 1 # get the maximum positive score for the current probe item # (usually, there is only one positive score, but just in case...) max_pos = numpy.max(pos) if threshold is not None and max_pos < threshold: # we have filtered out all positives, so any match is incorrect continue if neg is None or not neg.ndim: # if we had no negatives, or all negatives were below threshold, we have a match at rank 1 correct += 1 else: # count the number of negative scores that are higher than the best positive score index = numpy.sum(neg >= max_pos) if index < rank and (threshold is None or threshold <= max_pos): if index < rank and (threshold is None or max_pos >= threshold): correct += 1 return correct / float(len(cmc_scores)) return float(correct) / float(counter) def cmc(cmc_scores, threshold = None): ... ...
 ... ... @@ -10,6 +10,9 @@ import numpy import tarfile import os import logging logger = logging.getLogger('bob.measure') def open_file(filename, mode='rt'): """open_file(filename) -> file_like ... ... @@ -122,7 +125,6 @@ def split_four_column(filename): def cmc_four_column(filename): """cmc_four_column(filename) -> cmc_scores Loads scores to compute CMC curves from a file in four column format. The four column file needs to be in the same format as described in :py:func:`four_column`, and the ``test_label`` (column 3) has to contain the test/probe file name or a probe id. ... ... @@ -141,47 +143,28 @@ def cmc_four_column(filename): **Returns:** ``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))] A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database ``cmc_scores`` : [(negatives, positives)] A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database. Both ``negatives`` and ``positives`` can be either an 1D :py:class:`numpy.ndarray` of type ``float``, or ``None``. """ # extract positives and negatives pos_dict = {} neg_dict = {} # read four column list for (client_id, probe_id, probe_name, score_str) in four_column(filename): try: score = float(score_str) for (client_id, probe_id, probe_name, score) in four_column(filename): # check in which dict we have to put the score if client_id == probe_id: correct_dict = pos_dict else: correct_dict = neg_dict correct_dict = pos_dict if client_id == probe_id else neg_dict # append score if probe_name in correct_dict: correct_dict[probe_name].append(score) else: correct_dict[probe_name] = [score] except: raise SyntaxError("Cannot convert score '%s' to float" % score_str) # convert to lists of tuples of ndarrays retval = [] import logging logger = logging.getLogger('bob') for probe_name in sorted(pos_dict.keys()): if probe_name in neg_dict: retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array(pos_dict[probe_name], numpy.float64))) else: logger.warn('For probe name "%s" there are only positive scores. This probe name is ignored.' % probe_name) #test if there are probes for which only negatives exist for probe_name in sorted(neg_dict.keys()): if not probe_name in pos_dict.keys(): logger.warn('For probe name "%s" there are only negative scores. This probe name is ignored.' % probe_name) # convert that into the desired format return _convert_cmc_scores(neg_dict, pos_dict) return retval def five_column(filename): ... ... @@ -286,35 +269,19 @@ def cmc_five_column(filename): # read four column list for (client_id, _, probe_id, probe_name, score) in five_column(filename): # check in which dict we have to put the score if client_id == probe_id: correct_dict = pos_dict else: correct_dict = neg_dict correct_dict = pos_dict if client_id == probe_id else neg_dict # append score if probe_name in correct_dict: correct_dict[probe_name].append(score) else: correct_dict[probe_name] = [score] # convert to lists of tuples of ndarrays retval = [] import logging logger = logging.getLogger('bob') # convert that into the desired format return _convert_cmc_scores(neg_dict, pos_dict) for probe_name in sorted(pos_dict.keys()): if probe_name in neg_dict: retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array(pos_dict[probe_name], numpy.float64))) else: logger.warn('For probe name "%s" there are only positive scores. This probe name is ignored.' % probe_name) # test if there are probes for which only negatives exist for probe_name in sorted(neg_dict.keys()): if not probe_name in pos_dict.keys(): logger.warn('For probe name "%s" there are only negative scores. This probe name is ignored.' % probe_name) return retval def load_score(filename, ncolumns=None): def load_score(filename, ncolumns = 4): """Load scores using numpy.loadtxt and return the data as a numpy array. **Parameters:** ... ... @@ -333,11 +300,8 @@ def load_score(filename, ncolumns=None): 'claimed_id', 'real_id', 'test_label', and ['model_label'] """ if ncolumns is None: ncolumns = 4 def convertfunc(x): return x convertfunc = lambda x : x if ncolumns == 4: names = ('claimed_id', 'real_id', 'test_label', 'score') ... ... @@ -410,3 +374,13 @@ def dump_score(filename, score_lines): else: raise ValueError("Only scores with 4 and 5 columns are supported.") numpy.savetxt(filename, score_lines, fmt=fmt) def _convert_cmc_scores(neg_dict, pos_dict): """Converts the negative and positive scores read with :py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that is handled by the :py:func:`bob.measure.cmc` and similar functions.""" # convert to lists of tuples of ndarrays (or None) probe_names = sorted(set(neg_dict.keys()).union(set(pos_dict.keys()))) # get all scores in the desired format return [( numpy.array(neg_dict[probe_name], numpy.float64) if probe_name in neg_dict else None, numpy.array(pos_dict[probe_name], numpy.float64) if probe_name in pos_dict else None ) for probe_name in probe_names]
 ... ... @@ -268,9 +268,11 @@ def test_cmc(): from . import recognition_rate, cmc, load def n(*args): return numpy.array(args) # tests the CMC calculation # test data; should give match characteristics [1/2,1/4,1/3] and CMC [1/3,2/3,1] test_data = [((0.3, 1.1, 0.5), (0.7)), ((1.4, -1.3, 0.6), (0.2)), ((0.8, 0., 1.5), (-0.8, 1.8)), ((2., 1.3, 1.6, 0.9), (2.4))] test_data = [(n(0.3, 1.1, 0.5), n(0.7)), (n(1.4, -1.3, 0.6), n(0.2)), (n(0.8, 0., 1.5), n(-0.8, 1.8)), (n(2., 1.3, 1.6, 0.9), n(2.4))] # compute recognition rate rr = recognition_rate(test_data) nose.tools.eq_(rr, 0.5) ... ... @@ -319,8 +321,8 @@ def test_calibration(): min_cllr = calibration.min_cllr(negatives, positives) assert min_cllr <= cllr assert cllr, 3.61833457 assert min_cllr, 0.337364136 assert abs(cllr - 3.61833) < 1e-5, cllr assert abs(min_cllr - 0.33736) < 1e-5, min_cllr ... ... @@ -351,5 +353,3 @@ def test_open_set_recognition_rate(): assert bob.measure.recognition_rate(cmc_scores, threshold=0.5), 0.857142857143 t = bob.measure.far_threshold(normal_scores[0], normal_scores[1],far_value) assert bob.measure.recognition_rate(cmc_scores, threshold=t), 0.0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!