Commit d7f1095a authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Merge pull request #8 from tiagofrepereira2012/master

Recognition rate under a threshold - Issue #6
parents 02d6f8aa a696ace0
......@@ -78,9 +78,10 @@ def relevance (input, machine):
return retval
def recognition_rate(cmc_scores):
"""recognition_rate(cmc_scores) -> RR
def recognition_rate(cmc_scores, threshold=None):
"""recognition_rate(cmc_scores, threshold) -> RR
Calculates the recognition rate from the given input, which is identical
to the rank 1 (C)MC value.
......@@ -90,16 +91,20 @@ def recognition_rate(cmc_scores):
the :py:func:`bob.measure.load.cmc_four_column` or
:py:func:`bob.measure.load.cmc_five_column` function.
The recognition rate is defined as the number of test items, for which the
If **threshold** is set to `None`, the recognition rate is defined as the number of test items, for which the
positive score is greater than or equal to all negative scores, divided by
the number of all test items. If several positive scores for one test item
exist, the **highest** score is taken.
the number of all test items. If several positive scores for one test item exist, the **highest** score is taken.
If **threshold** assumes one value, the recognition rate is defined as the number of test items, for which the
positive score is greater than or equal to all negative scores and the threshold divided by
the number of all test items. If several positive scores for one test item exist, the **highest** score is taken.
**Parameters:**
``cmc_scores`` : CMC scores loaded with one of the functions (:py:func:`bob.measure.load.cmc_four_column` or :py:func:`bob.measure.load.cmc_five_column`)
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
``threshold`` : Decision threshold. If `None`, the decision threshold will be the **highest** positive score.
**Returns:**
``RR`` : float
......@@ -111,16 +116,27 @@ def recognition_rate(cmc_scores):
correct = 0.
for neg, pos in cmc_scores:
# get the maximum positive score for the current probe item
# (usually, there is only one positive score, but just in case...)
max_pos = numpy.max(pos)
# check if the positive score is smaller than all negative scores
if (neg < max_pos).all():
correct += 1.
#If threshold is none, let's use the highest positive score as the decision threshold
if(threshold is None):
# get the maximum positive score for the current probe item
# (usually, there is only one positive score, but just in case...)
max_pos = numpy.max(pos)
# check if the positive score is smaller than all negative scores
if (neg < max_pos).all():
correct += 1.
else:
#If threshold is NOT None, we have an openset identification
max_pos = numpy.max(pos)
if((threshold < max_pos) and (neg < max_pos).all()):
correct += 1.
# return relative number of correctly matched scores
return correct / float(len(cmc_scores))
def cmc(cmc_scores):
"""cmc(cmc_scores) -> curve
......@@ -143,6 +159,8 @@ def cmc(cmc_scores):
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
``threshold`` : Decision threshold. If `None`, the decision threshold will be the **highest** positive score.
**Returns:**
``curve`` : array_like(2D, float)
......@@ -156,14 +174,19 @@ def cmc(cmc_scores):
# compute MC
match_characteristic = numpy.zeros((max([len(neg) for (neg,pos) in cmc_scores])+1,), numpy.int)
for neg, pos in cmc_scores:
if((type(pos)!=float) and (len(pos) == 0)):
raise ValueError("For the CMC computation at least one positive score is necessary. Please review who you are loading the scores. You must set `load_only_negatives=False` in the :py:func:`bob.measure.load.cmc_four_column` or `:py:func:`bob.measure.load.cmc_five_column` methods.")
# get the maximum positive score for the current probe item
# (usually, there is only one positive score, but just in case...)
# (usually, there is only one positive score, but just in case...)
max_pos = numpy.max(pos)
# count the number of negative scores that are higher than the best positive score
index = numpy.sum(neg >= max_pos)
match_characteristic[index] += 1
# count the number of negative scores that are higher than the best positive score
index = numpy.sum(neg >= max_pos)
match_characteristic[index] += 1
# cumulate
cumulative_match_characteristic = numpy.ndarray(match_characteristic.shape, numpy.float64)
count = 0.
......@@ -174,6 +197,7 @@ def cmc(cmc_scores):
return cumulative_match_characteristic
def get_config():
"""Returns a string containing the configuration information.
"""
......
1 1 probe_1 1
1 1 probe_2 1
1 2 probe_3 0
1 2 probe_4 0
1 2 probe_5 0
1 3 probe_6 0
1 3 probe_7 0
1 4 probe_8 0
1 5 probe_9 0
2 1 probe_1 0
2 1 probe_2 0
2 2 probe_3 1
2 2 probe_4 1
2 2 probe_5 1
2 3 probe_6 0
2 3 probe_7 0
2 4 probe_8 0
2 5 probe_9 0
3 1 probe_1 0
3 1 probe_2 0
3 2 probe_3 0
3 2 probe_4 0
3 2 probe_5 0
3 3 probe_6 1
3 3 probe_7 -0.1
3 4 probe_8 0
3 5 probe_9 0
1 1 probe_1 1
1 1 probe_2 1
1 2 probe_3 0
1 2 probe_4 0
1 2 probe_5 0
1 3 probe_6 0
1 3 probe_7 0
1 4 probe_8 0
1 5 probe_9 0
2 1 probe_1 0
2 1 probe_2 0
2 2 probe_3 1
2 2 probe_4 1
2 2 probe_5 1
2 3 probe_6 0
2 3 probe_7 0
2 4 probe_8 0
2 5 probe_9 0
3 1 probe_1 0
3 1 probe_2 0
3 2 probe_3 0
3 2 probe_4 0
3 2 probe_5 0
3 3 probe_6 1
3 3 probe_7 -0.1
3 4 probe_8 0
3 5 probe_9 10
1 1 probe_1 1
1 1 probe_2 1
1 2 probe_3 0
1 2 probe_4 0
1 2 probe_5 0
1 3 probe_6 0
1 3 probe_7 0
1 4 probe_8 0
1 5 probe_9 0
2 1 probe_1 0
2 1 probe_2 0
2 2 probe_3 1
2 2 probe_4 1
2 2 probe_5 1
2 3 probe_6 0
2 3 probe_7 0
2 4 probe_8 0
2 5 probe_9 0
3 1 probe_1 0
3 1 probe_2 0
3 2 probe_3 0
3 2 probe_4 0
3 2 probe_5 0
3 3 probe_6 1
3 3 probe_7 1
3 4 probe_8 0
3 5 probe_9 0
......@@ -130,6 +130,7 @@ def split_four_column(filename):
def cmc_four_column(filename):
"""cmc_four_column(filename) -> cmc_scores
Loads scores to compute CMC curves from a file in four column format.
The four column file needs to be in the same format as described in :py:func:`four_column`,
......@@ -140,20 +141,23 @@ def cmc_four_column(filename):
Usually, the list of positive scores should contain only one element, but more are allowed.
The result of this function can directly be passed to, e.g., the :py:func:`bob.measure.cmc` function.
**Parameters:**
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
**Returns:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
"""
# extract positives and negatives
pos_dict = {}
neg_dict = {}
# read four column list
# read four column list
for (client_id, probe_id, probe_name, score_str) in four_column(filename):
try:
score = float(score_str)
......@@ -179,12 +183,15 @@ def cmc_four_column(filename):
retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array(pos_dict[probe_name], numpy.float64)))
else:
logger.warn('For probe name "%s" there are only positive scores. This probe name is ignored.' % probe_name)
# test if there are probes for which only negatives exist
#test if there are probes for which only negatives exist
for probe_name in sorted(neg_dict.keys()):
if not probe_name in pos_dict.keys():
logger.warn('For probe name "%s" there are only negative scores. This probe name is ignored.' % probe_name)
logger.warn('For probe name "%s" there are only negative scores. This probe name is ignored.' % probe_name)
return retval
def five_column(filename):
"""five_column(filename) -> claimed_id, model_label, real_id, test_label, score
......@@ -231,6 +238,7 @@ def five_column(filename):
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], field[3], score)
def split_five_column(filename):
"""split_five_column(filename) -> negatives, positives
......@@ -267,9 +275,10 @@ def split_five_column(filename):
return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64))
def cmc_five_column(filename):
"""cmc_four_column(filename) -> cmc_scores
Loads scores to compute CMC curves from a file in five column format.
The four column file needs to be in the same format as described in :py:func:`five_column`,
and the ``test_label`` (column 4) has to contain the test/probe file name or a probe id.
......@@ -288,6 +297,7 @@ def cmc_five_column(filename):
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
"""
# extract positives and negatives
pos_dict = {}
......@@ -309,6 +319,7 @@ def cmc_five_column(filename):
retval = []
import logging
logger = logging.getLogger('bob')
for probe_name in sorted(pos_dict.keys()):
if probe_name in neg_dict:
retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array(pos_dict[probe_name], numpy.float64)))
......@@ -318,4 +329,5 @@ def cmc_five_column(filename):
for probe_name in sorted(neg_dict.keys()):
if not probe_name in pos_dict.keys():
logger.warn('For probe name "%s" there are only negative scores. This probe name is ignored.' % probe_name)
return retval
......@@ -281,7 +281,7 @@ def write_score_file(
equal_indices = numpy.where(mask[p] == 0xff)
if len(equal_indices):
# model id found, use the first one
probes_ids.append(models_ids[equal_indices[0]])
probes_ids.append(models_ids[equal_indices[0][0]])
else:
# no model found; add non-existing id
probes_ids.append("unknown")
......
......@@ -321,3 +321,35 @@ def test_calibration():
assert min_cllr <= cllr
assert cllr, 3.61833457
assert min_cllr, 0.337364136
def test_open_set_recognition_rate():
far_value = 0.01
#No error files
cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set.txt"))
normal_scores = bob.measure.load.split_four_column(F("scores-cmc-4col-open-set.txt"))
assert bob.measure.recognition_rate(cmc_scores), 1.0
assert bob.measure.recognition_rate(cmc_scores, threshold=0.5), 1.0
t = bob.measure.far_threshold(normal_scores[0], normal_scores[1],far_value)
assert bob.measure.recognition_rate(cmc_scores, threshold=t), 1.0
#One error
cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set.txt"))
normal_scores = bob.measure.load.split_four_column(F("scores-cmc-4col-open-set.txt"))
assert bob.measure.recognition_rate(cmc_scores), 0.857142857143
assert bob.measure.recognition_rate(cmc_scores, threshold=0.5), 0.857142857143
t = bob.measure.far_threshold(normal_scores[0], normal_scores[1],far_value)
assert bob.measure.recognition_rate(cmc_scores, threshold=t), 0.857142857143
#Two errors
cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set.txt"))
normal_scores = bob.measure.load.split_four_column(F("scores-cmc-4col-open-set.txt"))
assert bob.measure.recognition_rate(cmc_scores), 0.857142857143
assert bob.measure.recognition_rate(cmc_scores, threshold=0.5), 0.857142857143
t = bob.measure.far_threshold(normal_scores[0], normal_scores[1],far_value)
assert bob.measure.recognition_rate(cmc_scores, threshold=t), 0.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment