diff --git a/bob/measure/load.py b/bob/measure/load.py index 3cbc4300e55db2b2a85d8522f39f6a2540fdf8de..07a6329172f24692b64abf78335cf2153930b185 100644 --- a/bob/measure/load.py +++ b/bob/measure/load.py @@ -6,8 +6,10 @@ """ import numpy +import csv import tarfile import os +import sys import logging logger = logging.getLogger('bob.measure') @@ -78,7 +80,7 @@ def four_column(filename): opened with :py:func:`open_file` containing the scores. - Returns: + Yields: str: The claimed identity -- the client name of the model that was used in the comparison @@ -91,19 +93,8 @@ def four_column(filename): float: The result of the comparison of the model and the probe """ + return _iterate_score_file(filename) - for i, l in enumerate(open_file(filename)): - if isinstance(l, bytes): l = l.decode('utf-8') - s = l.strip() - if len(s) == 0 or s[0] == '#': continue #empty or comment - field = [k.strip() for k in s.split()] - if len(field) < 4: - raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l)) - try: - score = float(field[3]) - except: - raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l)) - yield (field[0], field[1], field[2], score) def split_four_column(filename): @@ -135,8 +126,8 @@ def split_four_column(filename): """ - score_lines = load_score(filename, 4) - return get_negatives_positives(score_lines) + score_lines = four_column(filename) + return _split_scores(score_lines, 1) def cmc_four_column(filename): @@ -168,22 +159,8 @@ def cmc_four_column(filename): """ - # extract positives and negatives - pos_dict = {} - neg_dict = {} - # read four column list - for (client_id, probe_id, probe_name, score) in four_column(filename): - # check in which dict we have to put the score - correct_dict = pos_dict if client_id == probe_id else neg_dict - - # append score - if probe_name in correct_dict: - correct_dict[probe_name].append(score) - else: - correct_dict[probe_name] = [score] - - # convert that into the desired format - return _convert_cmc_scores(neg_dict, pos_dict) + score_lines = four_column(filename) + return _split_cmc_scores(score_lines, 1) def five_column(filename): @@ -205,7 +182,7 @@ def five_column(filename): opened with :py:func:`open_file` containing the scores. - Returns: + Yields: str: The claimed identity -- the client name of the model that was used in the comparison @@ -221,18 +198,7 @@ def five_column(filename): """ - for i, l in enumerate(open_file(filename)): - if isinstance(l, bytes): l = l.decode('utf-8') - s = l.strip() - if len(s) == 0 or s[0] == '#': continue #empty or comment - field = [k.strip() for k in s.split()] - if len(field) < 5: - raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l)) - try: - score = float(field[4]) - except: - raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l)) - yield (field[0], field[1], field[2], field[3], score) + return _iterate_score_file(filename) def split_five_column(filename): @@ -264,8 +230,8 @@ def split_five_column(filename): """ - score_lines = load_score(filename, 5) - return get_negatives_positives(score_lines) + score_lines = four_column(filename) + return _split_scores(score_lines, 2) def cmc_five_column(filename): @@ -294,22 +260,9 @@ def cmc_five_column(filename): ``positive`` scores for one probe of the database. """ - # extract positives and negatives - pos_dict = {} - neg_dict = {} - # read four column list - for (client_id, _, probe_id, probe_name, score) in five_column(filename): - # check in which dict we have to put the score - correct_dict = pos_dict if client_id == probe_id else neg_dict + score_lines = four_column(filename) + return _split_cmc_scores(score_lines, 2) - # append score - if probe_name in correct_dict: - correct_dict[probe_name].append(score) - else: - correct_dict[probe_name] = [score] - - # convert that into the desired format - return _convert_cmc_scores(neg_dict, pos_dict) def load_score(filename, ncolumns=None): @@ -425,11 +378,48 @@ def dump_score(filename, score_lines): numpy.savetxt(filename, score_lines, fmt=fmt) -def _convert_cmc_scores(neg_dict, pos_dict): - """Converts the negative and positive scores read with - :py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that - is handled by the :py:func:`bob.measure.cmc` and similar functions. +def _iterate_score_file(filename): + """Opens the score file for reading and yields the score file line by line in a tuple/list. + + The last element of the line (which is the score) will be transformed to float, the other elements will be str + """ + opened = open_file(filename, 'rb') + if sys.version_info.major > 2: + import io + opened = io.TextIOWrapper(opened, newline="") + + reader = csv.reader(opened, delimiter=' ') + for splits in reader: + splits[-1] = float(splits[-1]) + yield splits + + +def _split_scores(score_lines, real_id_index, claimed_id_index = 0, score_index = -1): + """Take the output of :py:func:`four_column` or :py:func:`five_column` and return negatives and positives. """ + positives, negatives = [], [] + for line in score_lines: + which = positives if line[claimed_id_index] == line[real_id_index] else negatives + which.append(line[score_index]) + + return (numpy.array(negatives), numpy.array(positives)) + +def _split_cmc_scores(score_lines, real_id_index, probe_name_index = None, claimed_id_index = 0, score_index = -1): + """Takes the output of :py:func:`four_column` or :py:func:`five_column` and return cmc scores. + """ + if probe_name_index is None: + probe_name_index = real_id_index + 1 + # extract positives and negatives + pos_dict = {} + neg_dict = {} + # read four column list + for line in score_lines: + which = pos_dict if line[claimed_id_index] == line[real_id_index] else neg_dict + probe_name = line[probe_name_index] + # append score + if probe_name not in which: + which[probe_name] = [] + which[probe_name].append(line[score_index]) # convert to lists of tuples of ndarrays (or None) probe_names = sorted(set(neg_dict.keys()).union(set(pos_dict.keys()))) diff --git a/bob/measure/test_error.py b/bob/measure/test_error.py index f369ddbb279d74ac39df2f686d9231119f351aaf..fd00788d96253d0757858c186b476a07e06e27ca 100644 --- a/bob/measure/test_error.py +++ b/bob/measure/test_error.py @@ -216,10 +216,10 @@ def test_plots(): # EPC curve, you need to have a development and a test set. We will split, # by the middle, the negatives and positives sample we have, just for the # sake of testing - dev_negatives = negatives[:(negatives.shape[0]/2)] - test_negatives = negatives[(negatives.shape[0]/2):] - dev_positives = positives[:(positives.shape[0]/2)] - test_positives = positives[(positives.shape[0]/2):] + dev_negatives = negatives[:(negatives.shape[0]//2)] + test_negatives = negatives[(negatives.shape[0]//2):] + dev_positives = positives[:(positives.shape[0]//2)] + test_positives = positives[(positives.shape[0]//2):] xy = epc(dev_negatives, dev_positives, test_negatives, test_positives, 100) # uncomment the next line to save a reference value diff --git a/bob/measure/test_io.py b/bob/measure/test_io.py index 12a50b5c9f926a7b7e5a2921000bb6a5bf828af1..0e6b829a1abc5303bd47465a868efa1c0122373c 100644 --- a/bob/measure/test_io.py +++ b/bob/measure/test_io.py @@ -20,7 +20,7 @@ def test_load_scores(): load_functions = {'4col' : bob.measure.load.four_column, '5col' : bob.measure.load.five_column} cols = {'4col' : 4, '5col' : 5} - for variant in ('4col', '5col'): + for variant in cols: # read score file in normal way normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure') normal_scores = list(load_functions[variant](normal_score_file)) @@ -43,7 +43,7 @@ def test_load_score(): scores = [] cols = {'4col' : 4, '5col' : 5} - for variant in ('4col', '5col'): + for variant in cols: # read score file in normal way normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure') normal_scores = bob.measure.load.load_score(normal_score_file, cols[variant]) @@ -67,7 +67,7 @@ def test_dump_score(): scores = [] cols = {'4col' : 4, '5col' : 5} - for variant in ('4col', '5col'): + for variant in cols: # read score file normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure') normal_scores = bob.measure.load.load_score(normal_score_file, cols[variant]) @@ -177,7 +177,7 @@ def test_from_openbr(): load_functions = {'4col' : bob.measure.load.four_column, '5col' : bob.measure.load.five_column} try: - for variant in ('4col', '5col'): + for variant in load_functions: # first, do not define keyword arguments -- let the file get the model and probe ids being created automatically bob.measure.openbr.write_score_file(matrix_file, mask_file, score_file, score_file_format="%sumn"%variant) assert os.path.exists(score_file) @@ -211,10 +211,10 @@ def test_from_openbr(): assert columns[i][j] == reference[i][j], str(columns[i]) + " != " + str(reference[i]) # check that the score is close (OpenBR write scores in float32 precision only) assert abs(columns[i][-1] - numpy.float32(reference[i][-1])) <= 1e-8, str(columns[i][-1]) + " != " + str(reference[i][-1]) - + #assert numpy.isclose(columns[i][-1], reference[i][-1], atol = 1e-3, rtol=1e-8), str(columns[i][-1]) + " != " + str(reference[i][-1]) assert numpy.allclose(columns[i][-1], reference[i][-1], atol = 1e-3, rtol=1e-8), str(columns[i][-1]) + " != " + str(reference[i][-1]) - + finally: shutil.rmtree(temp_dir)