Commit 5b69b076 authored by Manuel Günther's avatar Manuel Günther

Implemented RR with threshold correctly; changed load.cmc_*_column to allow...

Implemented RR with threshold correctly; changed load.cmc_*_column to allow empty (None) positive and negative scores
parent 9c5ef6c4
......@@ -124,20 +124,46 @@ def recognition_rate(cmc_scores, rank = None, threshold=None):
rank = 1
correct = 0
counter = 0
for neg, pos in cmc_scores:
if((type(pos)!=float) and (len(pos) == 0)):
raise ValueError("For the CMC computation at least one positive score per pair is necessary.")
if pos is None and neg is None:
raise ValueError("One pair of the CMC scores has neither positive nor negative values")
# filter out any negative or positive scores below threshold
if threshold is not None and neg is not None:
neg = numpy.array(neg[neg >= threshold])
if pos is None:
# no positives, so we definitely do not have a match;
# check if we have negatives above threshold
if not neg.ndim:
# we have no negative scores over the threshold, so we have correctly rejected the probe
# don't increase any of the two counters...
continue
# we have negatives over threshold, so we have incorrect classifications; independent on the actual rank
counter += 1
else:
# we have a positive, so we need to count the probe
counter += 1
# get the maximum positive score for the current probe item
# (usually, there is only one positive score, but just in case...)
max_pos = numpy.max(pos)
if threshold is not None and max_pos < threshold:
# we have filtered out all positives, so any match is incorrect
continue
if neg is None or not neg.ndim:
# if we had no negatives, or all negatives were below threshold, we have a match at rank 1
correct += 1
else:
# count the number of negative scores that are higher than the best positive score
index = numpy.sum(neg >= max_pos)
if index < rank and (threshold is None or threshold <= max_pos):
if index < rank and (threshold is None or max_pos >= threshold):
correct += 1
return correct / float(len(cmc_scores))
return float(correct) / float(counter)
def cmc(cmc_scores, threshold = None):
......
......@@ -10,6 +10,9 @@ import numpy
import tarfile
import os
import logging
logger = logging.getLogger('bob.measure')
def open_file(filename, mode='rt'):
"""open_file(filename) -> file_like
......@@ -122,7 +125,6 @@ def split_four_column(filename):
def cmc_four_column(filename):
"""cmc_four_column(filename) -> cmc_scores
Loads scores to compute CMC curves from a file in four column format.
The four column file needs to be in the same format as described in :py:func:`four_column`,
and the ``test_label`` (column 3) has to contain the test/probe file name or a probe id.
......@@ -141,47 +143,28 @@ def cmc_four_column(filename):
**Returns:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
``cmc_scores`` : [(negatives, positives)]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database.
Both ``negatives`` and ``positives`` can be either an 1D :py:class:`numpy.ndarray` of type ``float``, or ``None``.
"""
# extract positives and negatives
pos_dict = {}
neg_dict = {}
# read four column list
for (client_id, probe_id, probe_name, score_str) in four_column(filename):
try:
score = float(score_str)
for (client_id, probe_id, probe_name, score) in four_column(filename):
# check in which dict we have to put the score
if client_id == probe_id:
correct_dict = pos_dict
else:
correct_dict = neg_dict
correct_dict = pos_dict if client_id == probe_id else neg_dict
# append score
if probe_name in correct_dict:
correct_dict[probe_name].append(score)
else:
correct_dict[probe_name] = [score]
except:
raise SyntaxError("Cannot convert score '%s' to float" % score_str)
# convert to lists of tuples of ndarrays
retval = []
import logging
logger = logging.getLogger('bob')
for probe_name in sorted(pos_dict.keys()):
if probe_name in neg_dict:
retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array(pos_dict[probe_name], numpy.float64)))
else:
logger.warn('For probe name "%s" there are only positive scores. This probe name is ignored.' % probe_name)
#test if there are probes for which only negatives exist
for probe_name in sorted(neg_dict.keys()):
if not probe_name in pos_dict.keys():
logger.warn('For probe name "%s" there are only negative scores. This probe name is ignored.' % probe_name)
# convert that into the desired format
return _convert_cmc_scores(neg_dict, pos_dict)
return retval
def five_column(filename):
......@@ -286,35 +269,19 @@ def cmc_five_column(filename):
# read four column list
for (client_id, _, probe_id, probe_name, score) in five_column(filename):
# check in which dict we have to put the score
if client_id == probe_id:
correct_dict = pos_dict
else:
correct_dict = neg_dict
correct_dict = pos_dict if client_id == probe_id else neg_dict
# append score
if probe_name in correct_dict:
correct_dict[probe_name].append(score)
else:
correct_dict[probe_name] = [score]
# convert to lists of tuples of ndarrays
retval = []
import logging
logger = logging.getLogger('bob')
# convert that into the desired format
return _convert_cmc_scores(neg_dict, pos_dict)
for probe_name in sorted(pos_dict.keys()):
if probe_name in neg_dict:
retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array(pos_dict[probe_name], numpy.float64)))
else:
logger.warn('For probe name "%s" there are only positive scores. This probe name is ignored.' % probe_name)
# test if there are probes for which only negatives exist
for probe_name in sorted(neg_dict.keys()):
if not probe_name in pos_dict.keys():
logger.warn('For probe name "%s" there are only negative scores. This probe name is ignored.' % probe_name)
return retval
def load_score(filename, ncolumns=None):
def load_score(filename, ncolumns = 4):
"""Load scores using numpy.loadtxt and return the data as a numpy array.
**Parameters:**
......@@ -333,11 +300,8 @@ def load_score(filename, ncolumns=None):
'claimed_id', 'real_id', 'test_label', and ['model_label']
"""
if ncolumns is None:
ncolumns = 4
def convertfunc(x):
return x
convertfunc = lambda x : x
if ncolumns == 4:
names = ('claimed_id', 'real_id', 'test_label', 'score')
......@@ -410,3 +374,13 @@ def dump_score(filename, score_lines):
else:
raise ValueError("Only scores with 4 and 5 columns are supported.")
numpy.savetxt(filename, score_lines, fmt=fmt)
def _convert_cmc_scores(neg_dict, pos_dict):
"""Converts the negative and positive scores read with :py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that is handled by the :py:func:`bob.measure.cmc` and similar functions."""
# convert to lists of tuples of ndarrays (or None)
probe_names = sorted(set(neg_dict.keys()).union(set(pos_dict.keys())))
# get all scores in the desired format
return [(
numpy.array(neg_dict[probe_name], numpy.float64) if probe_name in neg_dict else None,
numpy.array(pos_dict[probe_name], numpy.float64) if probe_name in pos_dict else None
) for probe_name in probe_names]
......@@ -268,9 +268,11 @@ def test_cmc():
from . import recognition_rate, cmc, load
def n(*args):
return numpy.array(args)
# tests the CMC calculation
# test data; should give match characteristics [1/2,1/4,1/3] and CMC [1/3,2/3,1]
test_data = [((0.3, 1.1, 0.5), (0.7)), ((1.4, -1.3, 0.6), (0.2)), ((0.8, 0., 1.5), (-0.8, 1.8)), ((2., 1.3, 1.6, 0.9), (2.4))]
test_data = [(n(0.3, 1.1, 0.5), n(0.7)), (n(1.4, -1.3, 0.6), n(0.2)), (n(0.8, 0., 1.5), n(-0.8, 1.8)), (n(2., 1.3, 1.6, 0.9), n(2.4))]
# compute recognition rate
rr = recognition_rate(test_data)
nose.tools.eq_(rr, 0.5)
......@@ -319,8 +321,8 @@ def test_calibration():
min_cllr = calibration.min_cllr(negatives, positives)
assert min_cllr <= cllr
assert cllr, 3.61833457
assert min_cllr, 0.337364136
assert abs(cllr - 3.61833) < 1e-5, cllr
assert abs(min_cllr - 0.33736) < 1e-5, min_cllr
......@@ -351,5 +353,3 @@ def test_open_set_recognition_rate():
assert bob.measure.recognition_rate(cmc_scores, threshold=0.5), 0.857142857143
t = bob.measure.far_threshold(normal_scores[0], normal_scores[1],far_value)
assert bob.measure.recognition_rate(cmc_scores, threshold=t), 0.0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment