Commit 9c3872d9 authored by Manuel Günther's avatar Manuel Günther

Improved memory efficiency by using generators to load score files.

parent 62104737
...@@ -51,7 +51,7 @@ def four_column(filename): ...@@ -51,7 +51,7 @@ def four_column(filename):
Verifies that all fields are correctly placed and contain valid fields. Verifies that all fields are correctly placed and contain valid fields.
Returns a python list of tuples containing the following fields: Returns a python generator of tuples containing the following fields:
[0] [0]
claimed identity (string) claimed identity (string)
...@@ -63,7 +63,6 @@ def four_column(filename): ...@@ -63,7 +63,6 @@ def four_column(filename):
score (float) score (float)
""" """
retval = []
for i, l in enumerate(open_file(filename)): for i, l in enumerate(open_file(filename)):
if isinstance(l, bytes): l = l.decode('utf-8') if isinstance(l, bytes): l = l.decode('utf-8')
s = l.strip() s = l.strip()
...@@ -73,12 +72,10 @@ def four_column(filename): ...@@ -73,12 +72,10 @@ def four_column(filename):
raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l)) raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
try: try:
score = float(field[3]) score = float(field[3])
t = (field[0], field[1], field[2], score)
retval.append(t)
except: except:
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l)) raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], score)
return retval
def split_four_column(filename): def split_four_column(filename):
"""Loads a score set from a single file to memory and splits the scores """Loads a score set from a single file to memory and splits the scores
...@@ -92,21 +89,15 @@ def split_four_column(filename): ...@@ -92,21 +89,15 @@ def split_four_column(filename):
arrays of float64. arrays of float64.
""" """
# read four column list
scores_list = four_column(filename)
# split in positives and negatives # split in positives and negatives
neg = [] neg = []
pos = [] pos = []
for (client_id, probe_id, _, score_str) in scores_list: # read four column list line by line
try: for (client_id, probe_id, _, score) in four_column(filename):
score = float(score_str) if client_id == probe_id:
if client_id == probe_id: pos.append(score)
pos.append(score) else:
else: neg.append(score)
neg.append(score)
except:
raise SyntaxError('Cannot convert score "%s" to float' % score_str)
return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64)) return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64))
...@@ -121,12 +112,11 @@ def cmc_four_column(filename): ...@@ -121,12 +112,11 @@ def cmc_four_column(filename):
The result of this function can directly be passed to, e.g., the bob.measure.cmc function. The result of this function can directly be passed to, e.g., the bob.measure.cmc function.
""" """
# read four column list
all_list = four_column(filename)
# extract positives and negatives # extract positives and negatives
pos_dict = {} pos_dict = {}
neg_dict = {} neg_dict = {}
for (client_id, probe_id, probe_name, score_str) in all_list: # read four column list
for (client_id, probe_id, probe_name, score_str) in four_column(filename):
try: try:
score = float(score_str) score = float(score_str)
# check in which dict we have to put the score # check in which dict we have to put the score
...@@ -163,7 +153,7 @@ def five_column(filename): ...@@ -163,7 +153,7 @@ def five_column(filename):
Verifies that all fields are correctly placed and contain valid fields. Verifies that all fields are correctly placed and contain valid fields.
Returns a python list of tuples containing the following fields: Returns a python generator of tuples containing the following fields:
[0] [0]
claimed identity (string) claimed identity (string)
...@@ -177,7 +167,6 @@ def five_column(filename): ...@@ -177,7 +167,6 @@ def five_column(filename):
score (float) score (float)
""" """
retval = []
for i, l in enumerate(open_file(filename)): for i, l in enumerate(open_file(filename)):
s = l.strip() s = l.strip()
if len(s) == 0 or s[0] == '#': continue #empty or comment if len(s) == 0 or s[0] == '#': continue #empty or comment
...@@ -186,12 +175,9 @@ def five_column(filename): ...@@ -186,12 +175,9 @@ def five_column(filename):
raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l)) raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
try: try:
score = float(field[4]) score = float(field[4])
t = (field[0], field[1], field[2], field[3], score)
retval.append(t)
except: except:
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l)) raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], field[3], score)
return retval
def split_five_column(filename): def split_five_column(filename):
"""Loads a score set from a single file to memory and splits the scores """Loads a score set from a single file to memory and splits the scores
...@@ -205,21 +191,15 @@ def split_five_column(filename): ...@@ -205,21 +191,15 @@ def split_five_column(filename):
arrays of float64. arrays of float64.
""" """
# read five column list
scores_list = five_column(filename)
# split in positives and negatives # split in positives and negatives
neg = [] neg = []
pos = [] pos = []
for (client_id, _, probe_id, _, score_str) in scores_list: # read five column list
try: for (client_id, _, probe_id, _, score) in five_column(filename):
score = float(score_str) if client_id == probe_id:
if client_id == probe_id: pos.append(score)
pos.append(score) else:
else: neg.append(score)
neg.append(score)
except:
raise SyntaxError('Cannot convert score "%s" to float' % score_str)
return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64)) return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64))
...@@ -234,26 +214,21 @@ def cmc_five_column(filename): ...@@ -234,26 +214,21 @@ def cmc_five_column(filename):
The result of this function can directly be passed to, e.g., the bob.measure.cmc function. The result of this function can directly be passed to, e.g., the bob.measure.cmc function.
""" """
# read four column list # extract positives and negatives
all_list = five_column(filename)
pos_dict = {} pos_dict = {}
neg_dict = {} neg_dict = {}
for (client_id, _, probe_id, probe_name, score_str) in all_list: # read four column list
try: for (client_id, _, probe_id, probe_name, score) in five_column(filename):
score = float(score_str) # check in which dict we have to put the score
# check in which dict we have to put the score if client_id == probe_id:
if client_id == probe_id: correct_dict = pos_dict
correct_dict = pos_dict else:
else: correct_dict = neg_dict
correct_dict = neg_dict # append score
# append score if probe_name in correct_dict:
if probe_name in correct_dict: correct_dict[probe_name].append(score)
correct_dict[probe_name].append(score) else:
else: correct_dict[probe_name] = [score]
correct_dict[probe_name] = [score]
except:
raise SyntaxError('Cannot convert score "%s" to float' % score_str)
# convert to lists of tuples of ndarrays # convert to lists of tuples of ndarrays
retval = [] retval = []
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment