Skip to content
Snippets Groups Projects
Commit 493da265 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Memory optimizing CSVWriter

parent f178ce66
No related branches found
No related tags found
2 merge requests!185Wrappers and aggregators,!180[dask] Preparing bob.bio.base for dask pipelines
Pipeline #39635 failed
......@@ -34,7 +34,7 @@ class FourColumnsScoreWriter(ScoreWriter):
)
for biometric_reference in probe
]
filename = os.path.join(path, probe.subject) + ".txt"
filename = os.path.join(path, str(probe.subject)) + ".txt"
open(filename, "w").writelines(lines)
checkpointed_scores.append(
SampleSet(
......@@ -69,8 +69,18 @@ class FourColumnsScoreWriter(ScoreWriter):
class CSVScoreWriter(ScoreWriter):
"""
Read and write scores in CSV format, shipping all metadata with the scores
Parameters
----------
n_sample_sets:
Number of samplesets in one chunk
"""
def __init__(self, n_sample_sets=1000):
self.n_sample_sets = n_sample_sets
def write(self, probe_sampleset, path):
"""
Write scores and returns a :any:`bob.pipelines.DelayedSample` containing
......@@ -108,7 +118,7 @@ class CSVScoreWriter(ScoreWriter):
header, probe_dict, bioref_dict = create_csv_header(probe_sampleset[0])
for probe in probe_sampleset:
filename = os.path.join(path, probe.subject) + ".csv"
filename = os.path.join(path, str(probe.subject)) + ".csv"
with open(filename, "w") as f:
csv_write = csv.writer(f)
......@@ -150,14 +160,23 @@ class CSVScoreWriter(ScoreWriter):
"""
Given a list of samplsets, write them all in a single file
"""
os.makedirs(os.path.dirname(filename), exist_ok=True)
f = open(filename, "w")
first = True
for samplesets in samplesets_list:
# CSV files tends to be very big
# here, here we write them in chunks
base_dir = os.path.splitext(filename)[0]
os.makedirs(base_dir, exist_ok=True)
f = None
for i, samplesets in enumerate(samplesets_list):
if i% self.n_sample_sets==0:
if f is not None:
f.close()
del f
filename = os.path.join(base_dir, f"chunk_{i}.csv")
f = open(filename, "w")
for sset in samplesets:
for s in sset:
if first:
f.writelines(s.data)
first = False
else:
f.writelines(s.data[1:])
f.writelines(s.data)
samplesets_list[i] = None
\ No newline at end of file
......@@ -122,6 +122,9 @@ class BioAlgorithmCheckpointWrapper(BioAlgorithm):
class BioAlgorithmDaskWrapper(BioAlgorithm):
def __init__(self, biometric_algorithm, **kwargs):
self.biometric_algorithm = biometric_algorithm
# Copying attribute
if hasattr(biometric_algorithm, "score_writer"):
self.score_writer = biometric_algorithm.score_writer
def enroll_samples(self, biometric_reference_features):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment