Commit 2a607a31 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

Merge branch 'minimal_load' into 'master'

Add minimal load capability and fix the documentation warnings.

See merge request !25
parents c3c1ecbc 916220b2
Pipeline #6041 passed with stages
in 3 minutes and 8 seconds
...@@ -96,7 +96,6 @@ def four_column(filename): ...@@ -96,7 +96,6 @@ def four_column(filename):
return _iterate_score_file(filename) return _iterate_score_file(filename)
def split_four_column(filename): def split_four_column(filename):
"""Loads a score set from a single file and splits the scores """Loads a score set from a single file and splits the scores
...@@ -116,13 +115,13 @@ def split_four_column(filename): ...@@ -116,13 +115,13 @@ def split_four_column(filename):
Returns: Returns:
negatives (array): 1D float array containing the list of scores, for which array: negatives, 1D float array containing the list of scores, for which
the ``claimed_id`` and the ``real_id`` differed (see the ``claimed_id`` and the ``real_id`` are different
:py:func:`four_column`) (see :py:func:`four_column`)
positivies (array): 1D float array containing the list of scores, for which array: positives, 1D float array containing the list of scores, for which
the ``claimed_id`` and the ``real_id`` are identical (see the ``claimed_id`` and the ``real_id`` are identical
:py:func:`four_column`) (see :py:func:`four_column`)
""" """
...@@ -220,13 +219,13 @@ def split_five_column(filename): ...@@ -220,13 +219,13 @@ def split_five_column(filename):
Returns: Returns:
negatives (array): 1D float array containing the list of scores, for which array: negatives, 1D float array containing the list of scores, for which
the ``claimed_id`` and the ``real_id`` differed (see the ``claimed_id`` and the ``real_id`` are different
:py:func:`four_column`) (see :py:func:`four_column`)
positivies (array): 1D float array containing the list of scores, for which array: positives, 1D float array containing the list of scores, for which
the ``claimed_id`` and the ``real_id`` are identical (see the ``claimed_id`` and the ``real_id`` are identical
:py:func:`four_column`) (see :py:func:`four_column`)
""" """
...@@ -264,8 +263,7 @@ def cmc_five_column(filename): ...@@ -264,8 +263,7 @@ def cmc_five_column(filename):
return _split_cmc_scores(score_lines, 2) return _split_cmc_scores(score_lines, 2)
def load_score(filename, ncolumns=None, minimal=False, **kwargs):
def load_score(filename, ncolumns=None):
"""Load scores using numpy.loadtxt and return the data as a numpy array. """Load scores using numpy.loadtxt and return the data as a numpy array.
Parameters: Parameters:
...@@ -277,6 +275,11 @@ def load_score(filename, ncolumns=None): ...@@ -277,6 +275,11 @@ def load_score(filename, ncolumns=None):
specifying the number of columns in the score file. If None is provided, specifying the number of columns in the score file. If None is provided,
the number of columns will be guessed. the number of columns will be guessed.
minimal (:py:class:`bool`, optional): If True, only loads ``claimed_id``, ``real_id``,
and ``scores``.
**kwargs: Keyword arguments passed to :py:func:`numpy.genfromtxt`
Returns: Returns:
...@@ -300,6 +303,7 @@ def load_score(filename, ncolumns=None): ...@@ -300,6 +303,7 @@ def load_score(filename, ncolumns=None):
finally: finally:
f.close() f.close()
usecols = kwargs.pop('usecols', None)
if ncolumns == 4: if ncolumns == 4:
names = ('claimed_id', 'real_id', 'test_label', 'score') names = ('claimed_id', 'real_id', 'test_label', 'score')
converters = { converters = {
...@@ -307,6 +311,8 @@ def load_score(filename, ncolumns=None): ...@@ -307,6 +311,8 @@ def load_score(filename, ncolumns=None):
1: convertfunc, 1: convertfunc,
2: convertfunc, 2: convertfunc,
3: float} 3: float}
if minimal:
usecols = (0, 1, 3)
elif ncolumns == 5: elif ncolumns == 5:
names = ('claimed_id', 'model_label', 'real_id', 'test_label', 'score') names = ('claimed_id', 'model_label', 'real_id', 'test_label', 'score')
...@@ -316,12 +322,14 @@ def load_score(filename, ncolumns=None): ...@@ -316,12 +322,14 @@ def load_score(filename, ncolumns=None):
2: convertfunc, 2: convertfunc,
3: convertfunc, 3: convertfunc,
4: float} 4: float}
if minimal:
usecols = (0, 2, 4)
else: else:
raise ValueError("ncolumns of 4 and 5 are supported only.") raise ValueError("ncolumns of 4 and 5 are supported only.")
score_lines = numpy.genfromtxt( score_lines = numpy.genfromtxt(
open_file(filename, mode='rb'), dtype=None, names=names, open_file(filename, mode='rb'), dtype=None, names=names,
converters=converters, invalid_raise=True) converters=converters, invalid_raise=True, usecols=usecols, **kwargs)
new_dtype = [] new_dtype = []
for name in score_lines.dtype.names[:-1]: for name in score_lines.dtype.names[:-1]:
new_dtype.append((name, str(score_lines.dtype[name]).replace('S', 'U'))) new_dtype.append((name, str(score_lines.dtype[name]).replace('S', 'U')))
...@@ -342,6 +350,13 @@ def get_negatives_positives(score_lines): ...@@ -342,6 +350,13 @@ def get_negatives_positives(score_lines):
return (negatives, positives) return (negatives, positives)
def get_negatives_positives_from_file(filename, **kwargs):
"""Loads the scores first efficiently and then calls
get_negatives_positives"""
score_lines = load_score(filename, minimal=True, **kwargs)
return get_negatives_positives(score_lines)
def get_negatives_positives_all(score_lines_list): def get_negatives_positives_all(score_lines_list):
"""Take a list of outputs of load_score and return stacked negatives and """Take a list of outputs of load_score and return stacked negatives and
positives. positives.
......
...@@ -60,6 +60,12 @@ def test_load_score(): ...@@ -60,6 +60,12 @@ def test_load_score():
for name in normal_scores.dtype.names: for name in normal_scores.dtype.names:
assert all(normal_scores[name] == compressed_scores[name]) assert all(normal_scores[name] == compressed_scores[name])
# test minimal loading
minimal_scores = bob.measure.load.load_score(normal_score_file, minimal=True)
assert len(minimal_scores) == 910
assert len(minimal_scores.dtype) == 3
assert minimal_scores.dtype.names == ('claimed_id', 'real_id', 'score')
def test_dump_score(): def test_dump_score():
# This function tests the IO functionality of dumping score files # This function tests the IO functionality of dumping score files
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment