Commit 0c19f066 authored by Manuel Günther's avatar Manuel Günther

Rewrote split_four/five_column to use four/five_column; reimplemented four/five_column

parent 7ed0fdca
Pipeline #5135 failed with stages
in 5 minutes and 50 seconds
...@@ -79,7 +79,7 @@ def four_column(filename): ...@@ -79,7 +79,7 @@ def four_column(filename):
opened with :py:func:`open_file` containing the scores. opened with :py:func:`open_file` containing the scores.
Returns: Yields:
str: The claimed identity -- the client name of the model that was used in str: The claimed identity -- the client name of the model that was used in
the comparison the comparison
...@@ -93,18 +93,10 @@ def four_column(filename): ...@@ -93,18 +93,10 @@ def four_column(filename):
""" """
for i, l in enumerate(open_file(filename)): reader = csv.reader(open_file(filename, mode='rb'), delimiter=' ')
if isinstance(l, bytes): l = l.decode('utf-8') for splits in reader:
s = l.strip() splits[-1] = float(splits[-1])
if len(s) == 0 or s[0] == '#': continue #empty or comment yield splits
field = [k.strip() for k in s.split()]
if len(field) < 4:
raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
try:
score = float(field[3])
except:
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], score)
def split_four_column(filename): def split_four_column(filename):
...@@ -136,8 +128,8 @@ def split_four_column(filename): ...@@ -136,8 +128,8 @@ def split_four_column(filename):
""" """
score_lines = load_score_with_generator(filename, 4) score_lines = four_column(filename)
return get_negatives_positives_from_generator(score_lines) return _split_scores(score_lines, 1)
def cmc_four_column(filename): def cmc_four_column(filename):
...@@ -206,7 +198,7 @@ def five_column(filename): ...@@ -206,7 +198,7 @@ def five_column(filename):
opened with :py:func:`open_file` containing the scores. opened with :py:func:`open_file` containing the scores.
Returns: Yields:
str: The claimed identity -- the client name of the model that was used in str: The claimed identity -- the client name of the model that was used in
the comparison the comparison
...@@ -222,18 +214,10 @@ def five_column(filename): ...@@ -222,18 +214,10 @@ def five_column(filename):
""" """
for i, l in enumerate(open_file(filename)): reader = csv.reader(open_file(filename, mode='rb'), delimiter=' ')
if isinstance(l, bytes): l = l.decode('utf-8') for splits in reader:
s = l.strip() splits[-1] = float(splits[-1])
if len(s) == 0 or s[0] == '#': continue #empty or comment yield splits
field = [k.strip() for k in s.split()]
if len(field) < 5:
raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
try:
score = float(field[4])
except:
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], field[3], score)
def split_five_column(filename): def split_five_column(filename):
...@@ -265,8 +249,8 @@ def split_five_column(filename): ...@@ -265,8 +249,8 @@ def split_five_column(filename):
""" """
score_lines = load_score_with_generator(filename, 5) score_lines = four_column(filename)
return get_negatives_positives_from_generator(score_lines) return _split_scores(score_lines, 2)
def cmc_five_column(filename): def cmc_five_column(filename):
...@@ -313,62 +297,6 @@ def cmc_five_column(filename): ...@@ -313,62 +297,6 @@ def cmc_five_column(filename):
return _convert_cmc_scores(neg_dict, pos_dict) return _convert_cmc_scores(neg_dict, pos_dict)
COLUMNS = {
4 : ('claimed_id', 'real_id', 'test_label', 'score'),
5 : ('claimed_id', 'model_label', 'real_id', 'test_label', 'score')
}
def load_score_with_generator(filename, ncolumns=None):
"""Load scores using :py:class:`csv.DictReader` and yield the scores line by line in a dictionary.
Parameters:
filename (:py:class:`str`, ``file-like``): The file object that will be
opened with :py:func:`open_file` containing the scores.
ncolumns (:py:class:`int`, optional): 4, 5 or None (the default),
specifying the number of columns in the score file. If None is provided,
the number of columns will be guessed.
Yields:
line: A dictionary which contains not only the actual ``score`` but also the
``claimed_id``, ``real_id``, ``test_label`` (and ``['model_label']``)
"""
if ncolumns is None:
f = open_file(filename)
try:
line = f.readline()
ncolumns = len(line.split())
except Exception:
logger.warn('Could not guess the number of columns in file: {}. '
'Assuming 4 column format.'.format(filename))
ncolumns = 4
finally:
f.close()
elif ncolumns not in (4,5):
raise ValueError("ncolumns of 4 and 5 are supported only.")
reader = csv.DictReader(open_file(filename, mode='rb'), fieldnames=COLUMNS[ncolumns], delimiter=' ')
for splits in reader:
splits['score'] = float(splits['score'])
yield splits
def get_negatives_positives_from_generator(score_lines):
"""Take the output of :py:func:`load_score_with_generator` and return negatives and positives. This
function aims to replace split_four_column and split_five_column but takes a
different input. It's up to you to use which one.
"""
positives, negatives = [], []
for line in score_lines:
which = positives if line['claimed_id'] == line['real_id'] else negatives
which.append(line['score'])
return (numpy.array(negatives), numpy.array(positives))
def load_score(filename, ncolumns=None): def load_score(filename, ncolumns=None):
"""Load scores using numpy.loadtxt and return the data as a numpy array. """Load scores using numpy.loadtxt and return the data as a numpy array.
...@@ -482,6 +410,17 @@ def dump_score(filename, score_lines): ...@@ -482,6 +410,17 @@ def dump_score(filename, score_lines):
numpy.savetxt(filename, score_lines, fmt=fmt) numpy.savetxt(filename, score_lines, fmt=fmt)
def _split_scores(score_lines, real_id_index, claimed_id_index = 0, score_index = -1):
"""Take the output of :py:func:`four_column` or :py:func:`five_column` and return negatives and positives.
"""
positives, negatives = [], []
for line in score_lines:
which = positives if line[claimed_id_index] == line[real_id_index] else negatives
which.append(line[score_index])
return (numpy.array(negatives), numpy.array(positives))
def _convert_cmc_scores(neg_dict, pos_dict): def _convert_cmc_scores(neg_dict, pos_dict):
"""Converts the negative and positive scores read with """Converts the negative and positive scores read with
:py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that :py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment