Skip to content
Snippets Groups Projects

Resolve "load_scores extremely memory hungry"

Merged Manuel Günther requested to merge 19-load_scores-extremely-memory-hungry into master
+ 26
30
@@ -6,6 +6,7 @@
@@ -6,6 +6,7 @@
"""
"""
import numpy
import numpy
 
import csv
import tarfile
import tarfile
import os
import os
@@ -78,7 +79,7 @@ def four_column(filename):
@@ -78,7 +79,7 @@ def four_column(filename):
opened with :py:func:`open_file` containing the scores.
opened with :py:func:`open_file` containing the scores.
Returns:
Yields:
str: The claimed identity -- the client name of the model that was used in
str: The claimed identity -- the client name of the model that was used in
the comparison
the comparison
@@ -92,18 +93,10 @@ def four_column(filename):
@@ -92,18 +93,10 @@ def four_column(filename):
"""
"""
for i, l in enumerate(open_file(filename)):
reader = csv.reader(open_file(filename, mode='rb'), delimiter=' ')
if isinstance(l, bytes): l = l.decode('utf-8')
for splits in reader:
s = l.strip()
splits[-1] = float(splits[-1])
if len(s) == 0 or s[0] == '#': continue #empty or comment
yield splits
field = [k.strip() for k in s.split()]
if len(field) < 4:
raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
try:
score = float(field[3])
except:
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], score)
def split_four_column(filename):
def split_four_column(filename):
@@ -135,8 +128,8 @@ def split_four_column(filename):
@@ -135,8 +128,8 @@ def split_four_column(filename):
"""
"""
score_lines = load_score(filename, 4)
score_lines = four_column(filename)
return get_negatives_positives(score_lines)
return _split_scores(score_lines, 1)
def cmc_four_column(filename):
def cmc_four_column(filename):
@@ -205,7 +198,7 @@ def five_column(filename):
@@ -205,7 +198,7 @@ def five_column(filename):
opened with :py:func:`open_file` containing the scores.
opened with :py:func:`open_file` containing the scores.
Returns:
Yields:
str: The claimed identity -- the client name of the model that was used in
str: The claimed identity -- the client name of the model that was used in
the comparison
the comparison
@@ -221,18 +214,10 @@ def five_column(filename):
@@ -221,18 +214,10 @@ def five_column(filename):
"""
"""
for i, l in enumerate(open_file(filename)):
reader = csv.reader(open_file(filename, mode='rb'), delimiter=' ')
if isinstance(l, bytes): l = l.decode('utf-8')
for splits in reader:
s = l.strip()
splits[-1] = float(splits[-1])
if len(s) == 0 or s[0] == '#': continue #empty or comment
yield splits
field = [k.strip() for k in s.split()]
if len(field) < 5:
raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
try:
score = float(field[4])
except:
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], field[3], score)
def split_five_column(filename):
def split_five_column(filename):
@@ -264,8 +249,8 @@ def split_five_column(filename):
@@ -264,8 +249,8 @@ def split_five_column(filename):
"""
"""
score_lines = load_score(filename, 5)
score_lines = four_column(filename)
return get_negatives_positives(score_lines)
return _split_scores(score_lines, 2)
def cmc_five_column(filename):
def cmc_five_column(filename):
@@ -425,6 +410,17 @@ def dump_score(filename, score_lines):
@@ -425,6 +410,17 @@ def dump_score(filename, score_lines):
numpy.savetxt(filename, score_lines, fmt=fmt)
numpy.savetxt(filename, score_lines, fmt=fmt)
 
def _split_scores(score_lines, real_id_index, claimed_id_index = 0, score_index = -1):
 
"""Take the output of :py:func:`four_column` or :py:func:`five_column` and return negatives and positives.
 
"""
 
positives, negatives = [], []
 
for line in score_lines:
 
which = positives if line[claimed_id_index] == line[real_id_index] else negatives
 
which.append(line[score_index])
 
 
return (numpy.array(negatives), numpy.array(positives))
 
 
def _convert_cmc_scores(neg_dict, pos_dict):
def _convert_cmc_scores(neg_dict, pos_dict):
"""Converts the negative and positive scores read with
"""Converts the negative and positive scores read with
:py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that
:py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that
Loading