Commit 95b96cff authored by Manuel Günther's avatar Manuel Günther

Implemented generic functions to load and split scores; added (first) test...

Implemented generic functions to load and split scores; added (first) test case for split_four_column
parent 48f7d32d
Pipeline #7446 passed with stages
in 15 minutes and 48 seconds
......@@ -263,6 +263,92 @@ def cmc_five_column(filename):
return _split_cmc_scores(score_lines, 2)
def scores(filename, ncolumns=None):
"""scores(filename, ncolumns=None) -> tuple
Loads the scores from the given score file and yield its lines.
Depending on the score file format, four or five elements are yielded, see :py:func:`bob.measure.load.four_column` and :py:func:`bob.measure.load.five_column` for details.
Parameters:
filename: :py:class:`str`, ``file-like``:
The file object that will be opened with :py:func:`open_file` containing the scores.
ncolumns: any
ignored
Yields:
tuple:
see :py:func:`bob.measure.load.four_column` or :py:func:`bob.measure.load.five_column`
"""
return _iterate_score_file(filename)
def split(filename, ncolumns=None):
"""split(filename, ncolumns=None) -> negatives, positives
Loads the scores from the given score file and splits them into positives and negatives.
Depending on the score file format, it calls see :py:func:`bob.measure.load.split_four_column` and `:py:func:`bob.measure.load.split_five_column` for details.
Parameters:
filename: :py:class:`str`, ``file-like``:
The file object that will be opened with :py:func:`open_file` containing the scores.
ncolumns: int or ``None``
If specified to be ``4`` or ``5``, the score file will be assumed to be in the given format.
If not specified, the score file format will be estimated automatically
Returns:
negatives: 1D :py:class:`numpy.ndarray` of type float
This array contains the list of scores, for which the ``claimed_id`` and the ``real_id`` are different (see :py:func:`four_column`)
positives: 1D :py:class:`numpy.ndarray` of type float
This array contains the list of scores, for which the ``claimed_id`` and the ``real_id`` are identical (see :py:func:`four_column`)
"""
ncolumns = _estimate_score_file_format(filename, ncolumns)
if ncolumns == 4:
return split_four_column(filename)
else:
assert ncolumns == 5
return split_five_column(filename)
def cmc(filename, ncolumns=None):
"""cmc(filename, ncolumns=None) -> list
Loads scores to compute CMC curves.
Depending on the score file format, it calls see :py:func:`bob.measure.load.cmc_four_column` and `:py:func:`bob.measure.load.cmc_five_column` for details.
Parameters:
filename: :py:class:`str`, ``file-like``:
The file object that will be opened with :py:func:`open_file` containing the scores.
ncolumns: int or ``None``
If specified to be ``4`` or ``5``, the score file will be assumed to be in the given format.
If not specified, the score file format will be estimated automatically
Returns:
list: [(neg,pos)]
A list of tuples, where each tuple contains the ``negative`` and
``positive`` scores for one probe of the database.
"""
ncolumns = _estimate_score_file_format(filename, ncolumns)
if ncolumns == 4:
return cmc_four_column(filename)
else:
assert ncolumns == 5
return cmc_five_column(filename)
def load_score(filename, ncolumns=None, minimal=False, **kwargs):
"""Load scores using numpy.loadtxt and return the data as a numpy array.
......@@ -291,17 +377,7 @@ def load_score(filename, ncolumns=None, minimal=False, **kwargs):
def convertfunc(x):
return x
if ncolumns not in (4, 5):
f = open_file(filename)
try:
line = f.readline()
ncolumns = len(line.split())
except Exception:
logger.warn('Could not guess the number of columns in file: {}. '
'Assuming 4 column format.'.format(filename))
ncolumns = 4
finally:
f.close()
ncolumns = _estimate_score_file_format(filename, ncolumns)
usecols = kwargs.pop('usecols', None)
if ncolumns == 4:
......@@ -393,6 +469,26 @@ def dump_score(filename, score_lines):
numpy.savetxt(filename, score_lines, fmt=fmt)
def _estimate_score_file_format(filename, ncolumns=None):
"""Estimates the score file format from the given score file.
If ``ncolumns`` is in ``(4,5)``, then ``ncolumns`` is returned instead.
"""
if ncolumns in (4, 5):
return ncolumns
f = open_file(filename, 'rb')
try:
line = f.readline()
ncolumns = len(line.split())
except Exception:
logger.warn('Could not guess the number of columns in file: {}. '
'Assuming 4 column format.'.format(filename))
ncolumns = 4
finally:
f.close()
return ncolumns
def _iterate_score_file(filename):
"""Opens the score file for reading and yields the score file line by line in a tuple/list.
......
......@@ -75,21 +75,7 @@ def main(user_input=None):
from .. import load
# Loads score file
f = load.open_file(args['<scores>'])
try:
line = f.readline()
ncolumns = len(line.split())
except Exception:
logger.warn('Could not guess the number of columns in file: {}. '
'Assuming 4 column format.'.format(args['<scores>']))
ncolumns = 4
finally:
f.close()
if ncolumns == 4:
data = load.cmc_four_column(args['<scores>'])
else:
data = load.cmc_five_column(args['<scores>'])
data = load.cmc(args['<scores>'])
# compute recognition rate
from .. import recognition_rate
......
......@@ -308,6 +308,13 @@ def test_cmc():
cmc_ = cmc(data)
assert (cmc_ == desired_cmc).all()
data = load.cmc(F('scores-cmc-5col.txt'))
rr = recognition_rate(data)
nose.tools.eq_(rr, desired_rr)
cmc_ = cmc(data)
assert (cmc_ == desired_cmc).all()
def test_calibration():
......@@ -342,7 +349,7 @@ def test_calibration():
def test_open_set_rates():
# No error files
cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set.txt"))
cmc_scores = bob.measure.load.cmc(F("scores-cmc-4col-open-set.txt"))
assert abs(bob.measure.detection_identification_rate(cmc_scores, threshold=0.5) - 1.0) < 1e-8
assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8
......@@ -350,7 +357,7 @@ def test_open_set_rates():
assert abs(bob.measure.recognition_rate(cmc_scores, threshold=0.5) - 1.0) < 1e-8
# One error
cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set-one-error.txt"))
cmc_scores = bob.measure.load.cmc(F("scores-cmc-4col-open-set-one-error.txt"))
assert abs(bob.measure.detection_identification_rate(cmc_scores, threshold=0.5) - 6./7.) < 1e-8
assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8
......
......@@ -16,7 +16,6 @@ import bob.io.base.test_utils
def test_load_scores():
# This function tests the IO functionality of loading score files in different ways
scores = []
load_functions = {'4col' : bob.measure.load.four_column, '5col' : bob.measure.load.five_column}
cols = {'4col' : 4, '5col' : 5}
......@@ -36,6 +35,55 @@ def test_load_scores():
assert all(len(c) == cols[variant] for c in compressed_scores)
assert all(c[i] == s[i] for c,s in zip(compressed_scores, normal_scores) for i in range(cols[variant]))
### Use auto-estimated score file contents
# read score file in normal way
normal_scores = list(bob.measure.load.scores(normal_score_file))
assert len(normal_scores) == 910
assert all(len(s) == cols[variant] for s in normal_scores)
# read the compressed score file
compressed_scores = list(bob.measure.load.scores(compressed_score_file))
assert len(compressed_scores) == len(normal_scores)
assert all(len(c) == cols[variant] for c in compressed_scores)
assert all(c[i] == s[i] for c,s in zip(compressed_scores, normal_scores) for i in range(cols[variant]))
def test_split_scores():
# This function tests the IO functionality of loading score files in different ways
split_functions = {'4col' : bob.measure.load.split_four_column, '5col' : bob.measure.load.split_five_column}
cols = {'4col' : 4, '5col' : 5}
for variant in cols:
# read score file in normal way
normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure')
negatives, positives = split_functions[variant](normal_score_file)
assert len(negatives) == 520, len(negatives)
assert len(positives) == 390, len(positives)
# read the compressed score file
compressed_score_file = bob.io.base.test_utils.datafile('dev-%s.tar.gz' % variant, 'bob.measure')
negatives, positives = split_functions[variant](compressed_score_file)
assert len(negatives) == 520, len(negatives)
assert len(positives) == 390, len(positives)
### Use auto-estimated score file contents
# read score file in normal way
negatives, positives = bob.measure.load.split(normal_score_file)
assert len(negatives) == 520, len(negatives)
assert len(positives) == 390, len(positives)
# read the compressed score file
negatives, positives = bob.measure.load.split(compressed_score_file)
assert len(negatives) == 520, len(negatives)
assert len(positives) == 390, len(positives)
def test_load_score():
# This function tests the IO functionality of loading score files in different ways
......
......@@ -390,8 +390,7 @@ Usually, there is only a single positive score per probe, but this is not a fixe
.. note::
The complex data structure can be read from our default 4 or 5 column score
files using the :py:func:`bob.measure.load.cmc_four_column` or
:py:func:`bob.measure.load.cmc_five_column` function.
files using the :py:func:`bob.measure.load.cmc` function.
Detection & Identification Curve
......
......@@ -69,6 +69,9 @@ Loading data
.. autosummary::
bob.measure.load.open_file
bob.measure.load.scores
bob.measure.load.split
bob.measure.load.cmc
bob.measure.load.four_column
bob.measure.load.split_four_column
bob.measure.load.cmc_four_column
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment