Skip to content
Snippets Groups Projects

Resolve "load_scores extremely memory hungry"

Merged Manuel Günther requested to merge 19-load_scores-extremely-memory-hungry into master
Files
3
+ 37
30
@@ -6,8 +6,10 @@
"""
import numpy
import csv
import tarfile
import os
import sys
import logging
logger = logging.getLogger('bob.measure')
@@ -78,7 +80,7 @@ def four_column(filename):
opened with :py:func:`open_file` containing the scores.
Returns:
Yields:
str: The claimed identity -- the client name of the model that was used in
the comparison
@@ -92,18 +94,15 @@ def four_column(filename):
"""
for i, l in enumerate(open_file(filename)):
if isinstance(l, bytes): l = l.decode('utf-8')
s = l.strip()
if len(s) == 0 or s[0] == '#': continue #empty or comment
field = [k.strip() for k in s.split()]
if len(field) < 4:
raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
try:
score = float(field[3])
except:
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], score)
opened = open_file(filename, 'rb')
if sys.version_info.major > 2:
import io
opened = io.TextIOWrapper(opened, newline="")
reader = csv.reader(opened, delimiter=' ')
for splits in reader:
splits[-1] = float(splits[-1])
yield splits
def split_four_column(filename):
@@ -135,8 +134,8 @@ def split_four_column(filename):
"""
score_lines = load_score(filename, 4)
return get_negatives_positives(score_lines)
score_lines = four_column(filename)
return _split_scores(score_lines, 1)
def cmc_four_column(filename):
@@ -205,7 +204,7 @@ def five_column(filename):
opened with :py:func:`open_file` containing the scores.
Returns:
Yields:
str: The claimed identity -- the client name of the model that was used in
the comparison
@@ -221,18 +220,15 @@ def five_column(filename):
"""
for i, l in enumerate(open_file(filename)):
if isinstance(l, bytes): l = l.decode('utf-8')
s = l.strip()
if len(s) == 0 or s[0] == '#': continue #empty or comment
field = [k.strip() for k in s.split()]
if len(field) < 5:
raise SyntaxError('Line %d of file "%s" is invalid: %s' % (i, filename, l))
try:
score = float(field[4])
except:
raise SyntaxError('Cannot convert score to float at line %d of file "%s": %s' % (i, filename, l))
yield (field[0], field[1], field[2], field[3], score)
opened = open_file(filename, 'rb')
if sys.version_info.major > 2:
import io
opened = io.TextIOWrapper(opened, newline="")
reader = csv.reader(opened, delimiter=' ')
for splits in reader:
splits[-1] = float(splits[-1])
yield splits
def split_five_column(filename):
@@ -264,8 +260,8 @@ def split_five_column(filename):
"""
score_lines = load_score(filename, 5)
return get_negatives_positives(score_lines)
score_lines = four_column(filename)
return _split_scores(score_lines, 2)
def cmc_five_column(filename):
@@ -425,6 +421,17 @@ def dump_score(filename, score_lines):
numpy.savetxt(filename, score_lines, fmt=fmt)
def _split_scores(score_lines, real_id_index, claimed_id_index = 0, score_index = -1):
"""Take the output of :py:func:`four_column` or :py:func:`five_column` and return negatives and positives.
"""
positives, negatives = [], []
for line in score_lines:
which = positives if line[claimed_id_index] == line[real_id_index] else negatives
which.append(line[score_index])
return (numpy.array(negatives), numpy.array(positives))
def _convert_cmc_scores(neg_dict, pos_dict):
"""Converts the negative and positive scores read with
:py:func:`cmc_four_column` or :py:func:`cmc_four_column` into a format that
Loading