Skip to content
Snippets Groups Projects
Commit 493da265 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Memory optimizing CSVWriter

parent f178ce66
No related branches found
No related tags found
2 merge requests!185Wrappers and aggregators,!180[dask] Preparing bob.bio.base for dask pipelines
Pipeline #39635 failed
...@@ -34,7 +34,7 @@ class FourColumnsScoreWriter(ScoreWriter): ...@@ -34,7 +34,7 @@ class FourColumnsScoreWriter(ScoreWriter):
) )
for biometric_reference in probe for biometric_reference in probe
] ]
filename = os.path.join(path, probe.subject) + ".txt" filename = os.path.join(path, str(probe.subject)) + ".txt"
open(filename, "w").writelines(lines) open(filename, "w").writelines(lines)
checkpointed_scores.append( checkpointed_scores.append(
SampleSet( SampleSet(
...@@ -69,8 +69,18 @@ class FourColumnsScoreWriter(ScoreWriter): ...@@ -69,8 +69,18 @@ class FourColumnsScoreWriter(ScoreWriter):
class CSVScoreWriter(ScoreWriter): class CSVScoreWriter(ScoreWriter):
""" """
Read and write scores in CSV format, shipping all metadata with the scores Read and write scores in CSV format, shipping all metadata with the scores
Parameters
----------
n_sample_sets:
Number of samplesets in one chunk
""" """
def __init__(self, n_sample_sets=1000):
self.n_sample_sets = n_sample_sets
def write(self, probe_sampleset, path): def write(self, probe_sampleset, path):
""" """
Write scores and returns a :any:`bob.pipelines.DelayedSample` containing Write scores and returns a :any:`bob.pipelines.DelayedSample` containing
...@@ -108,7 +118,7 @@ class CSVScoreWriter(ScoreWriter): ...@@ -108,7 +118,7 @@ class CSVScoreWriter(ScoreWriter):
header, probe_dict, bioref_dict = create_csv_header(probe_sampleset[0]) header, probe_dict, bioref_dict = create_csv_header(probe_sampleset[0])
for probe in probe_sampleset: for probe in probe_sampleset:
filename = os.path.join(path, probe.subject) + ".csv" filename = os.path.join(path, str(probe.subject)) + ".csv"
with open(filename, "w") as f: with open(filename, "w") as f:
csv_write = csv.writer(f) csv_write = csv.writer(f)
...@@ -150,14 +160,23 @@ class CSVScoreWriter(ScoreWriter): ...@@ -150,14 +160,23 @@ class CSVScoreWriter(ScoreWriter):
""" """
Given a list of samplsets, write them all in a single file Given a list of samplsets, write them all in a single file
""" """
os.makedirs(os.path.dirname(filename), exist_ok=True)
# CSV files tends to be very big
# here, here we write them in chunks
base_dir = os.path.splitext(filename)[0]
os.makedirs(base_dir, exist_ok=True)
f = None
for i, samplesets in enumerate(samplesets_list):
if i% self.n_sample_sets==0:
if f is not None:
f.close()
del f
filename = os.path.join(base_dir, f"chunk_{i}.csv")
f = open(filename, "w") f = open(filename, "w")
first = True
for samplesets in samplesets_list:
for sset in samplesets: for sset in samplesets:
for s in sset: for s in sset:
if first:
f.writelines(s.data) f.writelines(s.data)
first = False samplesets_list[i] = None
else: \ No newline at end of file
f.writelines(s.data[1:])
...@@ -122,6 +122,9 @@ class BioAlgorithmCheckpointWrapper(BioAlgorithm): ...@@ -122,6 +122,9 @@ class BioAlgorithmCheckpointWrapper(BioAlgorithm):
class BioAlgorithmDaskWrapper(BioAlgorithm): class BioAlgorithmDaskWrapper(BioAlgorithm):
def __init__(self, biometric_algorithm, **kwargs): def __init__(self, biometric_algorithm, **kwargs):
self.biometric_algorithm = biometric_algorithm self.biometric_algorithm = biometric_algorithm
# Copying attribute
if hasattr(biometric_algorithm, "score_writer"):
self.score_writer = biometric_algorithm.score_writer
def enroll_samples(self, biometric_reference_features): def enroll_samples(self, biometric_reference_features):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment