Commit 0c19f066 authored by Manuel Günther's avatar Manuel Günther

Rewrote split_four/five_column to use four/five_column; reimplemented four/five_column

parent 7ed0fdca
Pipeline #5135 failed with stages
in 5 minutes and 50 seconds
......@@ -79,7 +79,7 @@ def four_column(filename):
opened with :py:func:`open_file` containing the scores.
Returns:
Yields:
str: The claimed identity -- the client name of the model that was used in
the comparison
......@@ -93,18 +93,10 @@ def four_column(filename):
"""
for i, l in enumerate(open_file(filename)):
if isinstance(l, bytes): l = l.decode('utf-8')
s = l.strip()
if len(s) == 0 or s[0] == '#': continue #empty or comment
field = [k.strip() for k in s.split()]
if len(field) < 4:
raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
try:
score = float(field[3])
except:
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], score)
reader = csv.reader(open_file(filename, mode='rb'), delimiter=' ')
for splits in reader:
splits[-1] = float(splits[-1])
yield splits
def split_four_column(filename):
......@@ -136,8 +128,8 @@ def split_four_column(filename):
"""
score_lines = load_score_with_generator(filename, 4)
return get_negatives_positives_from_generator(score_lines)
score_lines = four_column(filename)
return _split_scores(score_lines, 1)
def cmc_four_column(filename):
......@@ -206,7 +198,7 @@ def five_column(filename):
opened with :py:func:`open_file` containing the scores.
Returns:
Yields:
str: The claimed identity -- the client name of the model that was used in
the comparison
......@@ -222,18 +214,10 @@ def five_column(filename):
"""
for i, l in enumerate(open_file(filename)):
if isinstance(l, bytes): l = l.decode('utf-8')
s = l.strip()
if len(s) == 0 or s[0] == '#': continue #empty or comment
field = [k.strip() for k in s.split()]
if len(field) < 5:
raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
try:
score = float(field[4])
except:
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], field[3], score)
reader = csv.reader(open_file(filename, mode='rb'), delimiter=' ')
for splits in reader:
splits[-1] = float(splits[-1])
yield splits
def split_five_column(filename):
......@@ -265,8 +249,8 @@ def split_five_column(filename):
"""
score_lines = load_score_with_generator(filename, 5)
return get_negatives_positives_from_generator(score_lines)
score_lines = four_column(filename)
return _split_scores(score_lines, 2)
def cmc_five_column(filename):
......@@ -313,62 +297,6 @@ def cmc_five_column(filename):
return _convert_cmc_scores(neg_dict, pos_dict)
COLUMNS = {
4 : ('claimed_id', 'real_id', 'test_label', 'score'),
5 : ('claimed_id', 'model_label', 'real_id', 'test_label', 'score')
}
def load_score_with_generator(filename, ncolumns=None):
"""Load scores using :py:class:`csv.DictReader` and yield the scores line by line in a dictionary.
Parameters:
filename (:py:class:`str`, ``file-like``): The file object that will be
opened with :py:func:`open_file` containing the scores.
ncolumns (:py:class:`int`, optional): 4, 5 or None (the default),
specifying the number of columns in the score file. If None is provided,
the number of columns will be guessed.
Yields:
line: A dictionary which contains not only the actual ``score`` but also the
``claimed_id``, ``real_id``, ``test_label`` (and ``['model_label']``)
"""
if ncolumns is None:
f = open_file(filename)
try:
line = f.readline()
ncolumns = len(line.split())
except Exception:
logger.warn('Could not guess the number of columns in file: {}. '
'Assuming 4 column format.'.format(filename))
ncolumns = 4
finally:
f.close()
elif ncolumns not in (4,5):
raise ValueError("ncolumns of 4 and 5 are supported only.")
reader = csv.DictReader(open_file(filename, mode='rb'), fieldnames=COLUMNS[ncolumns], delimiter=' ')
for splits in reader:
splits['score'] = float(splits['score'])
yield splits
def get_negatives_positives_from_generator(score_lines):
"""Take the output of :py:func:`load_score_with_generator` and return negatives and positives. This
function aims to replace split_four_column and split_five_column but takes a
different input. It's up to you to use which one.
"""
positives, negatives = [], []
for line in score_lines:
which = positives if line['claimed_id'] == line['real_id'] else negatives
which.append(line['score'])
return (numpy.array(negatives), numpy.array(positives))
def load_score(filename, ncolumns=None):
"""Load scores using numpy.loadtxt and return the data as a numpy array.
......@@ -482,6 +410,17 @@ def dump_score(filename, score_lines):
numpy.savetxt(filename, score_lines, fmt=fmt)
def _split_scores(score_lines, real_id_index, claimed_id_index = 0, score_index = -1):
"""Take the output of :py:func:`four_column` or :py:func:`five_column` and return negatives and positives.
"""
positives, negatives = [], []
for line in score_lines:
which = positives if line[claimed_id_index] == line[real_id_index] else negatives
which.append(line[score_index])
return (numpy.array(negatives), numpy.array(positives))
def _convert_cmc_scores(neg_dict, pos_dict):
"""Converts the negative and positive scores read with
:py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment