Manuel Günther · 18ecfb37 · 504a69fc · 0c19f066 · 7ed0fdca · fb8f306c
--- a/bob/measure/load.py

+ 26

− 30

View file @ 0c19f066

Open in Web IDE
+++ b/bob/measure/load.py

+ 26

− 30

View file @ 0c19f066

Open in Web IDE
 @@ -6,6 +6,7 @@
 @@ -6,6 +6,7 @@
 """
 import numpy
+import csv
 import tarfile
 import os
 @@ -78,7 +79,7 @@ def four_column(filename):
 @@ -78,7 +79,7 @@ def four_column(filename):
      opened with :py:func:`open_file` containing the scores.
-  Returns:
+  Yields:
    str: The claimed identity -- the client name of the model that was used in
    the comparison
 @@ -92,18 +93,10 @@ def four_column(filename):
 @@ -92,18 +93,10 @@ def four_column(filename):
  """
-  for i, l in enumerate(open_file(filename)):
+  reader = csv.reader(open_file(filename, mode='rb'), delimiter=' ')
-    if isinstance(l, bytes): l = l.decode('utf-8')
+  for splits in reader:
-    s = l.strip()
+    splits[-1] = float(splits[-1])
-    if len(s) == 0 or s[0] == '#': continue #empty or comment
+    yield splits
-    field = [k.strip() for k in s.split()]
-    if len(field) < 4:
-      raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
-    try:
-      score = float(field[3])
-    except:
-      raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
-    yield (field[0], field[1], field[2], score)
 def split_four_column(filename):
 @@ -135,8 +128,8 @@ def split_four_column(filename):
 @@ -135,8 +128,8 @@ def split_four_column(filename):
  """
-  score_lines = load_score(filename, 4)
+  score_lines = four_column(filename)
-  return get_negatives_positives(score_lines)
+  return _split_scores(score_lines, 1)
 def cmc_four_column(filename):
 @@ -205,7 +198,7 @@ def five_column(filename):
 @@ -205,7 +198,7 @@ def five_column(filename):
      opened with :py:func:`open_file` containing the scores.
-  Returns:
+  Yields:
    str: The claimed identity -- the client name of the model that was used in
    the comparison
 @@ -221,18 +214,10 @@ def five_column(filename):
 @@ -221,18 +214,10 @@ def five_column(filename):
  """
-  for i, l in enumerate(open_file(filename)):
+  reader = csv.reader(open_file(filename, mode='rb'), delimiter=' ')
-    if isinstance(l, bytes): l = l.decode('utf-8')
+  for splits in reader:
-    s = l.strip()
+    splits[-1] = float(splits[-1])
-    if len(s) == 0 or s[0] == '#': continue #empty or comment
+    yield splits
-    field = [k.strip() for k in s.split()]
-    if len(field) < 5:
-      raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
-    try:
-      score = float(field[4])
-    except:
-      raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
-    yield (field[0], field[1], field[2], field[3], score)
 def split_five_column(filename):
 @@ -264,8 +249,8 @@ def split_five_column(filename):
 @@ -264,8 +249,8 @@ def split_five_column(filename):
  """
-  score_lines = load_score(filename, 5)
+  score_lines = four_column(filename)
-  return get_negatives_positives(score_lines)
+  return _split_scores(score_lines, 2)
 def cmc_five_column(filename):
 @@ -425,6 +410,17 @@ def dump_score(filename, score_lines):
 @@ -425,6 +410,17 @@ def dump_score(filename, score_lines):
  numpy.savetxt(filename, score_lines, fmt=fmt)
+def _split_scores(score_lines, real_id_index, claimed_id_index = 0, score_index = -1):
+  """Take the output of :py:func:`four_column` or :py:func:`five_column` and return negatives and positives.
+  """
+  positives, negatives = [], []
+  for line in score_lines:
+    which = positives if line[claimed_id_index] == line[real_id_index] else negatives
+    which.append(line[score_index])
+  return (numpy.array(negatives), numpy.array(positives))
 def _convert_cmc_scores(neg_dict, pos_dict):
  """Converts the negative and positive scores read with
  :py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that