diff --git a/bob/measure/load.py b/bob/measure/load.py index 5ed4035e275ca655079466ad5d90b727a1bb83c1..41e6e6cbf29ce9f486b04d8111c00e0d40aaa493 100644 --- a/bob/measure/load.py +++ b/bob/measure/load.py @@ -263,6 +263,92 @@ def cmc_five_column(filename): return _split_cmc_scores(score_lines, 2) +def scores(filename, ncolumns=None): + """scores(filename, ncolumns=None) -> tuple + + Loads the scores from the given score file and yield its lines. + Depending on the score file format, four or five elements are yielded, see :py:func:`bob.measure.load.four_column` and :py:func:`bob.measure.load.five_column` for details. + + Parameters: + + filename: :py:class:`str`, ``file-like``: + The file object that will be opened with :py:func:`open_file` containing the scores. + + ncolumns: any + ignored + + Yields: + + tuple: + see :py:func:`bob.measure.load.four_column` or :py:func:`bob.measure.load.five_column` + """ + return _iterate_score_file(filename) + + +def split(filename, ncolumns=None): + """split(filename, ncolumns=None) -> negatives, positives + + Loads the scores from the given score file and splits them into positives and negatives. + + Depending on the score file format, it calls see :py:func:`bob.measure.load.split_four_column` and `:py:func:`bob.measure.load.split_five_column` for details. + + Parameters: + + filename: :py:class:`str`, ``file-like``: + The file object that will be opened with :py:func:`open_file` containing the scores. + + ncolumns: int or ``None`` + If specified to be ``4`` or ``5``, the score file will be assumed to be in the given format. + If not specified, the score file format will be estimated automatically + + Returns: + + negatives: 1D :py:class:`numpy.ndarray` of type float + This array contains the list of scores, for which the ``claimed_id`` and the ``real_id`` are different (see :py:func:`four_column`) + + positives: 1D :py:class:`numpy.ndarray` of type float + This array contains the list of scores, for which the ``claimed_id`` and the ``real_id`` are identical (see :py:func:`four_column`) + + """ + ncolumns = _estimate_score_file_format(filename, ncolumns) + if ncolumns == 4: + return split_four_column(filename) + else: + assert ncolumns == 5 + return split_five_column(filename) + + +def cmc(filename, ncolumns=None): + """cmc(filename, ncolumns=None) -> list + + Loads scores to compute CMC curves. + + Depending on the score file format, it calls see :py:func:`bob.measure.load.cmc_four_column` and `:py:func:`bob.measure.load.cmc_five_column` for details. + + Parameters: + + filename: :py:class:`str`, ``file-like``: + The file object that will be opened with :py:func:`open_file` containing the scores. + + ncolumns: int or ``None`` + If specified to be ``4`` or ``5``, the score file will be assumed to be in the given format. + If not specified, the score file format will be estimated automatically + + Returns: + + list: [(neg,pos)] + A list of tuples, where each tuple contains the ``negative`` and + ``positive`` scores for one probe of the database. + """ + ncolumns = _estimate_score_file_format(filename, ncolumns) + if ncolumns == 4: + return cmc_four_column(filename) + else: + assert ncolumns == 5 + return cmc_five_column(filename) + + + def load_score(filename, ncolumns=None, minimal=False, **kwargs): """Load scores using numpy.loadtxt and return the data as a numpy array. @@ -291,17 +377,7 @@ def load_score(filename, ncolumns=None, minimal=False, **kwargs): def convertfunc(x): return x - if ncolumns not in (4, 5): - f = open_file(filename) - try: - line = f.readline() - ncolumns = len(line.split()) - except Exception: - logger.warn('Could not guess the number of columns in file: {}. ' - 'Assuming 4 column format.'.format(filename)) - ncolumns = 4 - finally: - f.close() + ncolumns = _estimate_score_file_format(filename, ncolumns) usecols = kwargs.pop('usecols', None) if ncolumns == 4: @@ -393,6 +469,26 @@ def dump_score(filename, score_lines): numpy.savetxt(filename, score_lines, fmt=fmt) +def _estimate_score_file_format(filename, ncolumns=None): + """Estimates the score file format from the given score file. + If ``ncolumns`` is in ``(4,5)``, then ``ncolumns`` is returned instead. + """ + if ncolumns in (4, 5): + return ncolumns + + f = open_file(filename, 'rb') + try: + line = f.readline() + ncolumns = len(line.split()) + except Exception: + logger.warn('Could not guess the number of columns in file: {}. ' + 'Assuming 4 column format.'.format(filename)) + ncolumns = 4 + finally: + f.close() + return ncolumns + + def _iterate_score_file(filename): """Opens the score file for reading and yields the score file line by line in a tuple/list. diff --git a/bob/measure/script/plot_cmc.py b/bob/measure/script/plot_cmc.py index 583b2036ecedf3c6a5739401982cc6d883fd645c..d2d6dd16b657a9815da073a4139eb7ceb50500f2 100644 --- a/bob/measure/script/plot_cmc.py +++ b/bob/measure/script/plot_cmc.py @@ -75,21 +75,7 @@ def main(user_input=None): from .. import load # Loads score file - f = load.open_file(args['<scores>']) - try: - line = f.readline() - ncolumns = len(line.split()) - except Exception: - logger.warn('Could not guess the number of columns in file: {}. ' - 'Assuming 4 column format.'.format(args['<scores>'])) - ncolumns = 4 - finally: - f.close() - - if ncolumns == 4: - data = load.cmc_four_column(args['<scores>']) - else: - data = load.cmc_five_column(args['<scores>']) + data = load.cmc(args['<scores>']) # compute recognition rate from .. import recognition_rate diff --git a/bob/measure/test_error.py b/bob/measure/test_error.py index 65b70787ce9aafbc7025505fd298566933bad416..75de57c811e2f6760712cfec84470ad60a1d8d9b 100644 --- a/bob/measure/test_error.py +++ b/bob/measure/test_error.py @@ -308,6 +308,13 @@ def test_cmc(): cmc_ = cmc(data) assert (cmc_ == desired_cmc).all() + data = load.cmc(F('scores-cmc-5col.txt')) + rr = recognition_rate(data) + nose.tools.eq_(rr, desired_rr) + cmc_ = cmc(data) + assert (cmc_ == desired_cmc).all() + + def test_calibration(): @@ -342,7 +349,7 @@ def test_calibration(): def test_open_set_rates(): # No error files - cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set.txt")) + cmc_scores = bob.measure.load.cmc(F("scores-cmc-4col-open-set.txt")) assert abs(bob.measure.detection_identification_rate(cmc_scores, threshold=0.5) - 1.0) < 1e-8 assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8 @@ -350,7 +357,7 @@ def test_open_set_rates(): assert abs(bob.measure.recognition_rate(cmc_scores, threshold=0.5) - 1.0) < 1e-8 # One error - cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set-one-error.txt")) + cmc_scores = bob.measure.load.cmc(F("scores-cmc-4col-open-set-one-error.txt")) assert abs(bob.measure.detection_identification_rate(cmc_scores, threshold=0.5) - 6./7.) < 1e-8 assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8 diff --git a/bob/measure/test_io.py b/bob/measure/test_io.py index ad62468bb1f99fb7748633644705b60aae01a25d..84fdc3d9eac8d47c34fcaab95b57ce810eb7b6e5 100644 --- a/bob/measure/test_io.py +++ b/bob/measure/test_io.py @@ -16,7 +16,6 @@ import bob.io.base.test_utils def test_load_scores(): # This function tests the IO functionality of loading score files in different ways - scores = [] load_functions = {'4col' : bob.measure.load.four_column, '5col' : bob.measure.load.five_column} cols = {'4col' : 4, '5col' : 5} @@ -36,6 +35,55 @@ def test_load_scores(): assert all(len(c) == cols[variant] for c in compressed_scores) assert all(c[i] == s[i] for c,s in zip(compressed_scores, normal_scores) for i in range(cols[variant])) + ### Use auto-estimated score file contents + # read score file in normal way + normal_scores = list(bob.measure.load.scores(normal_score_file)) + + assert len(normal_scores) == 910 + assert all(len(s) == cols[variant] for s in normal_scores) + + # read the compressed score file + compressed_scores = list(bob.measure.load.scores(compressed_score_file)) + + assert len(compressed_scores) == len(normal_scores) + assert all(len(c) == cols[variant] for c in compressed_scores) + assert all(c[i] == s[i] for c,s in zip(compressed_scores, normal_scores) for i in range(cols[variant])) + + +def test_split_scores(): + # This function tests the IO functionality of loading score files in different ways + + split_functions = {'4col' : bob.measure.load.split_four_column, '5col' : bob.measure.load.split_five_column} + cols = {'4col' : 4, '5col' : 5} + + for variant in cols: + # read score file in normal way + normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure') + negatives, positives = split_functions[variant](normal_score_file) + + assert len(negatives) == 520, len(negatives) + assert len(positives) == 390, len(positives) + + # read the compressed score file + compressed_score_file = bob.io.base.test_utils.datafile('dev-%s.tar.gz' % variant, 'bob.measure') + negatives, positives = split_functions[variant](compressed_score_file) + + assert len(negatives) == 520, len(negatives) + assert len(positives) == 390, len(positives) + + ### Use auto-estimated score file contents + # read score file in normal way + negatives, positives = bob.measure.load.split(normal_score_file) + + assert len(negatives) == 520, len(negatives) + assert len(positives) == 390, len(positives) + + # read the compressed score file + negatives, positives = bob.measure.load.split(compressed_score_file) + + assert len(negatives) == 520, len(negatives) + assert len(positives) == 390, len(positives) + def test_load_score(): # This function tests the IO functionality of loading score files in different ways diff --git a/doc/guide.rst b/doc/guide.rst index d08f4e928002305afbccbcdac21f85c16b79f044..44c3b2fff38e7d5dd204d48f5fed939210d8549f 100644 --- a/doc/guide.rst +++ b/doc/guide.rst @@ -390,8 +390,7 @@ Usually, there is only a single positive score per probe, but this is not a fixe .. note:: The complex data structure can be read from our default 4 or 5 column score - files using the :py:func:`bob.measure.load.cmc_four_column` or - :py:func:`bob.measure.load.cmc_five_column` function. + files using the :py:func:`bob.measure.load.cmc` function. Detection & Identification Curve diff --git a/doc/py_api.rst b/doc/py_api.rst index 55875a92b94c8a24bf4aec29a617777b37265529..356a55aeeda08114413e06932eacfff1b1a8b7c3 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -69,6 +69,9 @@ Loading data .. autosummary:: bob.measure.load.open_file + bob.measure.load.scores + bob.measure.load.split + bob.measure.load.cmc bob.measure.load.four_column bob.measure.load.split_four_column bob.measure.load.cmc_four_column