Tackled issue 6

parent 02d6f8aa
......@@ -78,7 +78,8 @@ def relevance (input, machine):
return retval
def recognition_rate(cmc_scores):
def recognition_rate(cmc_scores, threshold=None):
"""recognition_rate(cmc_scores) -> RR
Calculates the recognition rate from the given input, which is identical
......@@ -90,16 +91,23 @@ def recognition_rate(cmc_scores):
the :py:func:`bob.measure.load.cmc_four_column` or
:py:func:`bob.measure.load.cmc_five_column` function.
The recognition rate is defined as the number of test items, for which the
If **threshold** is set to `None`, the recognition rate is defined as the number of test items, for which the
positive score is greater than or equal to all negative scores, divided by
the number of all test items. If several positive scores for one test item
exist, the **highest** score is taken.
the number of all test items. If several positive scores for one test item exist, the **highest** score is taken.
If **threshold** assumes one value, the recognition rate is defined as the number of test items, for which the
positive score is greater than or equal to all negative scores and the threshold divided by
the number of all test items. If several positive scores for one test item exist, the **highest** score is taken.
If a particular test item has only negative scores a correct classification hit it done if all the negative scores are higher than
the **threshold**. For this particular test, the definition of threshold is mandatory.
**Parameters:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
``cmc_scores`` : CMC scores loaded with one of the functions (:py:func:`bob.measure.load.cmc_four_column` or
:py:func:`bob.measure.load.cmc_five_column`)
``threshold`` : Decision threshold. If `None`, the decision threshold will be the **highest** positive score.
**Returns:**
``RR`` : float
......@@ -111,6 +119,9 @@ def recognition_rate(cmc_scores):
correct = 0.
for neg, pos in cmc_scores:
#If threshold is none, let's use the highest positive score as the decision threshold
if(threshold is None):
# get the maximum positive score for the current probe item
# (usually, there is only one positive score, but just in case...)
max_pos = numpy.max(pos)
......@@ -118,9 +129,28 @@ def recognition_rate(cmc_scores):
if (neg < max_pos).all():
correct += 1.
else:
#If threshold is NOT None, we have an openset identification
if(len(pos)>0):
# if we have positive scores the comparison is considered correct
# if the positive score is higher than the threshold AND all negative scores
max_pos = numpy.max(pos)
if((threshold < max_pos) and (neg < max_pos).all()):
correct += 1.
else:
#If we don't have a positive score we only will consider
#a correct classification if ALL the negative scores are smaller than the threshold
if (neg < threshold).all():
correct += 1.
# return relative number of correctly matched scores
return correct / float(len(cmc_scores))
def cmc(cmc_scores):
"""cmc(cmc_scores) -> curve
......@@ -143,6 +173,8 @@ def cmc(cmc_scores):
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
``threshold`` : Decision threshold. If `None`, the decision threshold will be the **highest** positive score.
**Returns:**
``curve`` : array_like(2D, float)
......@@ -156,10 +188,15 @@ def cmc(cmc_scores):
# compute MC
match_characteristic = numpy.zeros((max([len(neg) for (neg,pos) in cmc_scores])+1,), numpy.int)
for neg, pos in cmc_scores:
if((type(pos)!=float) and (len(pos) == 0)):
raise ValueError("For the CMC computation at least one positive score is necessary. Please review who you are loading the scores. You must set `load_only_negatives=False` in the :py:func:`bob.measure.load.cmc_four_column` or `:py:func:`bob.measure.load.cmc_five_column` methods.")
# get the maximum positive score for the current probe item
# (usually, there is only one positive score, but just in case...)
max_pos = numpy.max(pos)
# count the number of negative scores that are higher than the best positive score
index = numpy.sum(neg >= max_pos)
match_characteristic[index] += 1
......@@ -174,6 +211,7 @@ def cmc(cmc_scores):
return cumulative_match_characteristic
def get_config():
"""Returns a string containing the configuration information.
"""
......
......@@ -128,8 +128,10 @@ def split_four_column(filename):
return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64))
def cmc_four_column(filename):
"""cmc_four_column(filename) -> cmc_scores
def cmc_four_column(filename, load_only_negatives=False):
"""
cmc_four_column(filename) -> cmc_scores
Loads scores to compute CMC curves from a file in four column format.
The four column file needs to be in the same format as described in :py:func:`four_column`,
......@@ -140,15 +142,21 @@ def cmc_four_column(filename):
Usually, the list of positive scores should contain only one element, but more are allowed.
The result of this function can directly be passed to, e.g., the :py:func:`bob.measure.cmc` function.
**Parameters:**
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
``load_only_negatives`` : boolean
Set this argument to **True** if you want also to load the probes that has negative scores **only** (used for open-set recognition).
**Returns:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
"""
# extract positives and negatives
pos_dict = {}
......@@ -174,18 +182,29 @@ def cmc_four_column(filename):
retval = []
import logging
logger = logging.getLogger('bob')
if(not load_only_negatives):
for probe_name in sorted(pos_dict.keys()):
if probe_name in neg_dict:
retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array(pos_dict[probe_name], numpy.float64)))
else:
logger.warn('For probe name "%s" there are only positive scores. This probe name is ignored.' % probe_name)
# test if there are probes for which only negatives exist
#test if there are probes for which only negatives exist
for probe_name in sorted(neg_dict.keys()):
if not probe_name in pos_dict.keys():
logger.warn('For probe name "%s" there are only negative scores. This probe name is ignored.' % probe_name)
else:
for probe_name in sorted(pos_dict.keys()):
retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array(pos_dict[probe_name], numpy.float64)))
for probe_name in sorted(neg_dict.keys()):
if not probe_name in pos_dict.keys():
retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array([], numpy.float64)))
return retval
def five_column(filename):
"""five_column(filename) -> claimed_id, model_label, real_id, test_label, score
......@@ -231,6 +250,7 @@ def five_column(filename):
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], field[3], score)
def split_five_column(filename):
"""split_five_column(filename) -> negatives, positives
......@@ -267,8 +287,10 @@ def split_five_column(filename):
return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64))
def cmc_five_column(filename):
"""cmc_four_column(filename) -> cmc_scores
def cmc_five_column(filename, load_only_negatives=False):
"""
cmc_four_column(filename) -> cmc_scores
Loads scores to compute CMC curves from a file in five column format.
The four column file needs to be in the same format as described in :py:func:`five_column`,
......@@ -284,10 +306,15 @@ def cmc_five_column(filename):
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
``load_only_negatives`` : boolean
Set this argument to **True** if you want also to load the probes that has negative scores **only** (used for open-set recognition).
**Returns:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
"""
# extract positives and negatives
pos_dict = {}
......@@ -309,6 +336,8 @@ def cmc_five_column(filename):
retval = []
import logging
logger = logging.getLogger('bob')
if(not load_only_negatives):
for probe_name in sorted(pos_dict.keys()):
if probe_name in neg_dict:
retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array(pos_dict[probe_name], numpy.float64)))
......@@ -318,4 +347,14 @@ def cmc_five_column(filename):
for probe_name in sorted(neg_dict.keys()):
if not probe_name in pos_dict.keys():
logger.warn('For probe name "%s" there are only negative scores. This probe name is ignored.' % probe_name)
else:
for probe_name in sorted(pos_dict.keys()):
retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array(pos_dict[probe_name], numpy.float64)))
for probe_name in sorted(neg_dict.keys()):
if not probe_name in pos_dict.keys():
retval.append((numpy.array(neg_dict[probe_name], numpy.float64), numpy.array([], numpy.float64)))
return retval
......@@ -321,3 +321,25 @@ def test_calibration():
assert min_cllr <= cllr
assert cllr, 3.61833457
assert min_cllr, 0.337364136
def test_open_set_recognition_rate():
#No error files
scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set.txt"), load_only_negatives=True)
assert bob.measure.recognition_rate(scores, threshold=0.5), 1.0
assert bob.measure.recognition_rate(scores, threshold=10.), 0.222222222222
#One error
scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set-one-error.txt"),
load_only_negatives=True)
assert bob.measure.recognition_rate(scores, threshold=0.5), 0.888888888889
assert bob.measure.recognition_rate(scores, threshold=10.), 0.222222222222
#Two errors
scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set-two-errors.txt"),
load_only_negatives=True)
assert bob.measure.recognition_rate(scores, threshold=0.5), 0.777777777778
assert bob.measure.recognition_rate(scores, threshold=10.), 0.111111111111
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment