Manuel Günther · 18ecfb37 · 504a69fc · 0c19f066 · 7ed0fdca · fb8f306c
--- a/bob/measure/load.py

+ 61

− 4

View file @ 7ed0fdca

Open in Web IDE
+++ b/bob/measure/load.py

+ 61

− 4

View file @ 7ed0fdca

Open in Web IDE
 @@ -6,6 +6,7 @@
 """

 import numpy
+import csv
 import tarfile
 import os

 @@ -135,8 +136,8 @@ def split_four_column(filename):

  """

-  score_lines = load_score(filename, 4)
-  return get_negatives_positives(score_lines)
+  score_lines = load_score_with_generator(filename, 4)
+  return get_negatives_positives_from_generator(score_lines)


 def cmc_four_column(filename):
 @@ -264,8 +265,8 @@ def split_five_column(filename):

  """

-  score_lines = load_score(filename, 5)
-  return get_negatives_positives(score_lines)
+  score_lines = load_score_with_generator(filename, 5)
+  return get_negatives_positives_from_generator(score_lines)


 def cmc_five_column(filename):
 @@ -312,6 +313,62 @@ def cmc_five_column(filename):
  return _convert_cmc_scores(neg_dict, pos_dict)


+COLUMNS = {
+  4 : ('claimed_id', 'real_id', 'test_label', 'score'),
+  5 : ('claimed_id', 'model_label', 'real_id', 'test_label', 'score')
+}
+
+def load_score_with_generator(filename, ncolumns=None):
+  """Load scores using :py:class:`csv.DictReader` and yield the scores line by line in a dictionary.
+
+  Parameters:
+
+    filename (:py:class:`str`, ``file-like``): The file object that will be
+      opened with :py:func:`open_file` containing the scores.
+
+    ncolumns (:py:class:`int`, optional): 4, 5 or None (the default),
+      specifying the number of columns in the score file. If None is provided,
+      the number of columns will be guessed.
+
+
+  Yields:
+
+    line: A dictionary which contains not only the actual ``score`` but also the
+    ``claimed_id``, ``real_id``, ``test_label`` (and ``['model_label']``)
+  """
+
+  if ncolumns is None:
+    f = open_file(filename)
+    try:
+      line = f.readline()
+      ncolumns = len(line.split())
+    except Exception:
+      logger.warn('Could not guess the number of columns in file: {}. '
+                  'Assuming 4 column format.'.format(filename))
+      ncolumns = 4
+    finally:
+      f.close()
+  elif ncolumns not in (4,5):
+    raise ValueError("ncolumns of 4 and 5 are supported only.")
+
+  reader = csv.DictReader(open_file(filename, mode='rb'), fieldnames=COLUMNS[ncolumns], delimiter=' ')
+  for splits in reader:
+    splits['score'] = float(splits['score'])
+    yield splits
+
+def get_negatives_positives_from_generator(score_lines):
+  """Take the output of :py:func:`load_score_with_generator` and return negatives and positives.  This
+  function aims to replace split_four_column and split_five_column but takes a
+  different input. It's up to you to use which one.
+  """
+  positives, negatives = [], []
+  for line in score_lines:
+    which = positives if line['claimed_id'] == line['real_id'] else negatives
+    which.append(line['score'])
+
+  return (numpy.array(negatives), numpy.array(positives))
+
+
 def load_score(filename, ncolumns=None):
  """Load scores using numpy.loadtxt and return the data as a numpy array.