diff --git a/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py b/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py index 9998d4a58ab180b045ad8c81b49cf4c343e527b1..a6e4f0f53c1f2eea059823c266e3d0f2551cfdfe 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py @@ -34,7 +34,7 @@ class FourColumnsScoreWriter(ScoreWriter): ) for biometric_reference in probe ] - filename = os.path.join(path, probe.subject) + ".txt" + filename = os.path.join(path, str(probe.subject)) + ".txt" open(filename, "w").writelines(lines) checkpointed_scores.append( SampleSet( @@ -69,8 +69,18 @@ class FourColumnsScoreWriter(ScoreWriter): class CSVScoreWriter(ScoreWriter): """ Read and write scores in CSV format, shipping all metadata with the scores + + Parameters + ---------- + + n_sample_sets: + Number of samplesets in one chunk + """ + def __init__(self, n_sample_sets=1000): + self.n_sample_sets = n_sample_sets + def write(self, probe_sampleset, path): """ Write scores and returns a :any:`bob.pipelines.DelayedSample` containing @@ -108,7 +118,7 @@ class CSVScoreWriter(ScoreWriter): header, probe_dict, bioref_dict = create_csv_header(probe_sampleset[0]) for probe in probe_sampleset: - filename = os.path.join(path, probe.subject) + ".csv" + filename = os.path.join(path, str(probe.subject)) + ".csv" with open(filename, "w") as f: csv_write = csv.writer(f) @@ -150,14 +160,23 @@ class CSVScoreWriter(ScoreWriter): """ Given a list of samplsets, write them all in a single file """ - os.makedirs(os.path.dirname(filename), exist_ok=True) - f = open(filename, "w") - first = True - for samplesets in samplesets_list: + + # CSV files tends to be very big + # here, here we write them in chunks + + base_dir = os.path.splitext(filename)[0] + os.makedirs(base_dir, exist_ok=True) + f = None + for i, samplesets in enumerate(samplesets_list): + if i% self.n_sample_sets==0: + if f is not None: + f.close() + del f + + filename = os.path.join(base_dir, f"chunk_{i}.csv") + f = open(filename, "w") + for sset in samplesets: for s in sset: - if first: - f.writelines(s.data) - first = False - else: - f.writelines(s.data[1:]) + f.writelines(s.data) + samplesets_list[i] = None \ No newline at end of file diff --git a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py index 2c2688cc5b094e9d091e84ce5320575aed5819cb..973039625200b4af245d1b8352637ea1d22fafcb 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py @@ -122,6 +122,9 @@ class BioAlgorithmCheckpointWrapper(BioAlgorithm): class BioAlgorithmDaskWrapper(BioAlgorithm): def __init__(self, biometric_algorithm, **kwargs): self.biometric_algorithm = biometric_algorithm + # Copying attribute + if hasattr(biometric_algorithm, "score_writer"): + self.score_writer = biometric_algorithm.score_writer def enroll_samples(self, biometric_reference_features):