diff --git a/bob/measure/load.py b/bob/measure/load.py index 90a676a4a28a2d4d521b6bb52a73b3cf0932875c..290cc2ac4b4b0be8e19dff0b575454ef268bb0a6 100644 --- a/bob/measure/load.py +++ b/bob/measure/load.py @@ -264,8 +264,7 @@ def cmc_five_column(filename): return _split_cmc_scores(score_lines, 2) - -def load_score(filename, ncolumns=None): +def load_score(filename, ncolumns=None, minimal=False, **kwargs): """Load scores using numpy.loadtxt and return the data as a numpy array. Parameters: @@ -277,6 +276,11 @@ def load_score(filename, ncolumns=None): specifying the number of columns in the score file. If None is provided, the number of columns will be guessed. + minimal (:py:class:`bool`, optional): If True, only loads ``claimed_id``, ``real_id``, + and ``scores``. + + **kwargs: Keyword arguments passed to :py:func:`numpy.genfromtxt` + Returns: @@ -300,6 +304,7 @@ def load_score(filename, ncolumns=None): finally: f.close() + usecols = kwargs.pop('usecols', None) if ncolumns == 4: names = ('claimed_id', 'real_id', 'test_label', 'score') converters = { @@ -307,6 +312,8 @@ def load_score(filename, ncolumns=None): 1: convertfunc, 2: convertfunc, 3: float} + if minimal: + usecols = (0, 1, 3) elif ncolumns == 5: names = ('claimed_id', 'model_label', 'real_id', 'test_label', 'score') @@ -316,12 +323,14 @@ def load_score(filename, ncolumns=None): 2: convertfunc, 3: convertfunc, 4: float} + if minimal: + usecols = (0, 2, 4) else: raise ValueError("ncolumns of 4 and 5 are supported only.") score_lines = numpy.genfromtxt( open_file(filename, mode='rb'), dtype=None, names=names, - converters=converters, invalid_raise=True) + converters=converters, invalid_raise=True, usecols=usecols, **kwargs) new_dtype = [] for name in score_lines.dtype.names[:-1]: new_dtype.append((name, str(score_lines.dtype[name]).replace('S', 'U'))) @@ -342,6 +351,13 @@ def get_negatives_positives(score_lines): return (negatives, positives) +def get_negatives_positives_from_file(filename, **kwargs): + """Loads the scores first efficiently and then calls + get_negatives_positives""" + score_lines = load_score(filename, minimal=True, **kwargs) + return get_negatives_positives(score_lines) + + def get_negatives_positives_all(score_lines_list): """Take a list of outputs of load_score and return stacked negatives and positives. diff --git a/bob/measure/test_io.py b/bob/measure/test_io.py index 0e6b829a1abc5303bd47465a868efa1c0122373c..ad62468bb1f99fb7748633644705b60aae01a25d 100644 --- a/bob/measure/test_io.py +++ b/bob/measure/test_io.py @@ -60,6 +60,12 @@ def test_load_score(): for name in normal_scores.dtype.names: assert all(normal_scores[name] == compressed_scores[name]) + # test minimal loading + minimal_scores = bob.measure.load.load_score(normal_score_file, minimal=True) + assert len(minimal_scores) == 910 + assert len(minimal_scores.dtype) == 3 + assert minimal_scores.dtype.names == ('claimed_id', 'real_id', 'score') + def test_dump_score(): # This function tests the IO functionality of dumping score files