diff --git a/bob/bio/base/config/examples/gabor_mobio-male.py b/bob/bio/base/config/examples/gabor_mobio-male.py deleted file mode 100644 index 8336c510b5cbd927647abd607fb6339f99bcbcc7..0000000000000000000000000000000000000000 --- a/bob/bio/base/config/examples/gabor_mobio-male.py +++ /dev/null @@ -1,82 +0,0 @@ -from bob.bio.base.pipelines.vanilla_biometrics.implemented import CheckpointDistance -from bob.bio.base.pipelines.vanilla_biometrics.legacy import ( - DatabaseConnector, - Preprocessor, - Extractor, - AlgorithmAsBioAlg, -) -from bob.bio.face.database.mobio import MobioBioDatabase -from bob.bio.face.preprocessor import FaceCrop -from bob.extension import rc -from bob.pipelines.transformers import CheckpointSampleLinearize, CheckpointSamplePCA -from sklearn.pipeline import make_pipeline -import functools -import os -import bob.bio.face -import math - -base_dir = "example" - - -database = DatabaseConnector( - MobioBioDatabase( - original_directory=rc["bob.db.mobio.directory"], - annotation_directory=rc["bob.db.mobio.annotation_directory"], - original_extension=".png", - protocol="mobile0-male", - ) -) -database.allow_score_multiple_references = True - -# Using face crop -CROPPED_IMAGE_HEIGHT = 80 -CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5 -# eye positions for frontal images -RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1) -LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) - -# FaceCrop -preprocessor = functools.partial( - bob.bio.face.preprocessor.INormLBP, - face_cropper=functools.partial( - bob.bio.face.preprocessor.FaceCrop, - cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), - cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, - color_channel="gray", - ), -) - -extractor = functools.partial( - bob.bio.face.extractor.GridGraph, - # Gabor parameters - gabor_sigma=math.sqrt(2.0) * math.pi, - # what kind of information to extract - normalize_gabor_jets=True, - # setup of the fixed grid - node_distance=(8, 8), -) - -transformer = make_pipeline( - Preprocessor(preprocessor, features_dir=os.path.join(base_dir, "face_cropper")), - Extractor(extractor, features_dir=os.path.join(base_dir, "gabor_graph")), -) - - -## algorithm - -gabor_jet = functools.partial( - bob.bio.face.algorithm.GaborJet, - gabor_jet_similarity_type="PhaseDiffPlusCanberra", - multiple_feature_scoring="max_jet", - gabor_sigma=math.sqrt(2.0) * math.pi, -) - -algorithm = AlgorithmAsBioAlg(callable=gabor_jet, features_dir=base_dir, allow_score_multiple_references=True) -#algorithm = AlgorithmAsBioAlg(callable=gabor_jet, features_dir=base_dir) - - -from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics - -#pipeline = VanillaBiometrics(transformer, algorithm) -pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm), npartitions=48) - diff --git a/bob/bio/base/config/examples/isv_atnt_legacy_all_legacy.py b/bob/bio/base/config/examples/isv_atnt_legacy_all_legacy.py deleted file mode 100644 index 41ee5f2c30a74561cca1b3f3dec90345ffc07bfd..0000000000000000000000000000000000000000 --- a/bob/bio/base/config/examples/isv_atnt_legacy_all_legacy.py +++ /dev/null @@ -1,75 +0,0 @@ -from bob.bio.face.database import AtntBioDatabase -from bob.bio.gmm.algorithm import ISV -from bob.bio.face.preprocessor import FaceCrop -from sklearn.pipeline import make_pipeline -from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, Preprocessor, AlgorithmAsTransformer, AlgorithmAsBioAlg, Extractor -import functools -from bob.bio.base.pipelines.vanilla_biometrics.implemented import ( - Distance, - CheckpointDistance, -) -import os - -# DATABASE -database = DatabaseConnector( - AtntBioDatabase(original_directory="./atnt", protocol="Default"), -) -database.allow_scoring_with_all_biometric_references = True - -base_dir = "example/isv" - -# PREPROCESSOR LEGACY - -# Cropping -CROPPED_IMAGE_HEIGHT = 80 -CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5 - -# eye positions for frontal images -RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1) -LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) - - -# RANDOM EYES POSITIONS -# I JUST MADE UP THESE NUMBERS -FIXED_RIGHT_EYE_POS = (30, 30) -FIXED_LEFT_EYE_POS = (20, 50) - -face_cropper = functools.partial( - FaceCrop, - cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), - cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, - fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS}, -) - - -import bob.bio.face - -extractor = functools.partial( - bob.bio.face.extractor.DCTBlocks, - block_size=12, - block_overlap=11, - number_of_dct_coefficients=45, -) - - - -# ALGORITHM LEGACY -isv = functools.partial(ISV, subspace_dimension_of_u=10, number_of_gaussians=2) - -model_path=os.path.join(base_dir, "ubm_u.hdf5") -transformer = make_pipeline( - Preprocessor(callable=face_cropper, features_dir=os.path.join(base_dir,"face_crop")), - Extractor(extractor, features_dir=os.path.join(base_dir, "dcts")), - AlgorithmAsTransformer( - callable=isv, features_dir=os.path.join(base_dir,"isv"), model_path=model_path - ), -) - - -algorithm = AlgorithmAsBioAlg(callable=isv, features_dir=base_dir, model_path=model_path) - - -from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics - -#pipeline = VanillaBiometrics(transformer, algorithm) -pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm)) diff --git a/bob/bio/base/config/examples/lda_atnt_legacy.py b/bob/bio/base/config/examples/lda_atnt_legacy.py deleted file mode 100644 index dd6f93edc46fb041870d2dea18ab66f02fd5257a..0000000000000000000000000000000000000000 --- a/bob/bio/base/config/examples/lda_atnt_legacy.py +++ /dev/null @@ -1,65 +0,0 @@ -from bob.bio.face.database import AtntBioDatabase -from bob.bio.base.algorithm import LDA -from bob.bio.face.preprocessor import FaceCrop -from sklearn.pipeline import make_pipeline -from bob.pipelines.transformers import CheckpointSampleLinearize -from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, Preprocessor, AlgorithmAsTransformer -import functools -from bob.bio.base.pipelines.vanilla_biometrics.implemented import ( - Distance, - CheckpointDistance, -) - -# DATABASE - -database = DatabaseConnector( - AtntBioDatabase(original_directory="./atnt", protocol="Default"), -) - - -# PREPROCESSOR LEGACY - -# Cropping -CROPPED_IMAGE_HEIGHT = 80 -CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5 - -# eye positions for frontal images -RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1) -LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) - - -# RANDOM EYES POSITIONS -# I JUST MADE UP THESE NUMBERS -FIXED_RIGHT_EYE_POS = (30, 30) -FIXED_LEFT_EYE_POS = (20, 50) - -face_cropper = functools.partial( - FaceCrop, - cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), - cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, - fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS}, -) - -# ALGORITHM LEGACY - -lda = functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90) - - -transformer = make_pipeline( - Preprocessor(callable=face_cropper, features_dir="./example/transformer0"), - CheckpointSampleLinearize(features_dir="./example/transformer1"), - AlgorithmAsTransformer( - callable=lda, features_dir="./example/transformer2", model_path="./example/lda_projector.hdf5" - ), -) - - - -algorithm = CheckpointDistance(features_dir="./example/") -# algorithm = Distance() - - -from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics - -#pipeline = VanillaBiometrics(transformer, algorithm) -pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm)) diff --git a/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py b/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py deleted file mode 100644 index d9736430ea82c0623137ee53ae3bf6919e40242e..0000000000000000000000000000000000000000 --- a/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py +++ /dev/null @@ -1,64 +0,0 @@ -from bob.bio.face.database import AtntBioDatabase -from bob.bio.base.algorithm import LDA -from bob.bio.face.preprocessor import FaceCrop -from sklearn.pipeline import make_pipeline -from bob.pipelines.transformers import CheckpointSampleLinearize -from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, Preprocessor, AlgorithmAsTransformer, AlgorithmAsBioAlg -import functools -from bob.bio.base.pipelines.vanilla_biometrics.implemented import ( - Distance, - CheckpointDistance, -) -import os - -# DATABASE -database = DatabaseConnector( - AtntBioDatabase(original_directory="./atnt", protocol="Default"), -) - -base_dir = "example" - -# PREPROCESSOR LEGACY - -# Cropping -CROPPED_IMAGE_HEIGHT = 80 -CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5 - -# eye positions for frontal images -RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1) -LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) - - -# RANDOM EYES POSITIONS -# I JUST MADE UP THESE NUMBERS -FIXED_RIGHT_EYE_POS = (30, 30) -FIXED_LEFT_EYE_POS = (20, 50) - -face_cropper = functools.partial( - FaceCrop, - cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), - cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, - fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS}, -) - -# ALGORITHM LEGACY - -lda = functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90) - - -transformer = make_pipeline( - Preprocessor(callable=face_cropper, features_dir=os.path.join(base_dir,"transformer0")), - CheckpointSampleLinearize(features_dir=os.path.join(base_dir,"transformer1")), - AlgorithmAsTransformer( - callable=lda, features_dir=os.path.join(base_dir,"transformer2"), model_path=os.path.join(base_dir, "lda.hdf5") - ), -) - - -algorithm = AlgorithmAsBioAlg(callable=lda, features_dir="./example/") - - -from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics - -#pipeline = VanillaBiometrics(transformer, algorithm) -pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm)) diff --git a/bob/bio/base/config/examples/pca_atnt.py b/bob/bio/base/config/examples/pca_atnt.py deleted file mode 100644 index e75daf2752badb3930f34a10c62943455fa38970..0000000000000000000000000000000000000000 --- a/bob/bio/base/config/examples/pca_atnt.py +++ /dev/null @@ -1,33 +0,0 @@ -from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector -from sklearn.pipeline import make_pipeline -from bob.pipelines.transformers import CheckpointSampleLinearize, CheckpointSamplePCA -from bob.bio.base.pipelines.vanilla_biometrics.implemented import ( - CheckpointDistance, -) -from bob.bio.face.database import AtntBioDatabase -import os - - -base_dir = "example" - -database = DatabaseConnector(AtntBioDatabase(original_directory="./atnt", protocol="Default")) -database.allow_scoring_with_all_biometric_references = True - -transformer = make_pipeline( - CheckpointSampleLinearize(features_dir=os.path.join(base_dir, "linearize")), - CheckpointSamplePCA( - features_dir=os.path.join(base_dir, "pca_features"), model_path=os.path.join(base_dir, "pca.pkl") - ), -) -algorithm = CheckpointDistance(features_dir=base_dir, allow_score_multiple_references=True) - -# # comment out the code below to disable dask -from bob.pipelines.mixins import estimator_dask_it, mix_me_up -from bob.bio.base.pipelines.vanilla_biometrics.mixins import ( - BioAlgDaskMixin, -) - -from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics - -pipeline = VanillaBiometrics(transformer, algorithm) -#pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm)) diff --git a/bob/bio/base/config/examples/pca_mobio-male.py b/bob/bio/base/config/examples/pca_mobio-male.py deleted file mode 100644 index 668a1e6f7cbdc34dbae1f1ac5bcf40299db07bcf..0000000000000000000000000000000000000000 --- a/bob/bio/base/config/examples/pca_mobio-male.py +++ /dev/null @@ -1,50 +0,0 @@ -from bob.bio.base.pipelines.vanilla_biometrics.implemented import ( - CheckpointDistance, -) -from bob.bio.base.pipelines.vanilla_biometrics.legacy import ( - DatabaseConnector, - Preprocessor, -) -from bob.bio.face.database.mobio import MobioBioDatabase -from bob.bio.face.preprocessor import FaceCrop -from bob.extension import rc -from bob.pipelines.transformers import CheckpointSampleLinearize, CheckpointSamplePCA -from sklearn.pipeline import make_pipeline -import functools - - -database = DatabaseConnector( - MobioBioDatabase( - original_directory=rc["bob.db.mobio.directory"], - annotation_directory=rc["bob.db.mobio.annotation_directory"], - original_extension=".png", - protocol="mobile0-male", - ) -) - -# Using face crop -CROPPED_IMAGE_HEIGHT = 80 -CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5 -# eye positions for frontal images -RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1) -LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) -# FaceCrop -preprocessor = functools.partial( - FaceCrop, - cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), - cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, -) - -transformer = make_pipeline( - Preprocessor(preprocessor, features_dir="./example/extractor0"), - CheckpointSampleLinearize(features_dir="./example/extractor1"), - CheckpointSamplePCA( - features_dir="./example/extractor2", model_path="./example/pca.pkl" - ), -) -algorithm = CheckpointDistance(features_dir="./example/") - -from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics - -#pipeline = VanillaBiometrics(transformer, algorithm) -pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm), npartitions=48) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py index d67835735a076de0842a1f9467eaf54eb53a4fb7..5f6a882001b489ec6708465963389bf0d2e32380 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py @@ -1,6 +1,6 @@ -# see https://docs.python.org/3/library/pkgutil.html -from pkgutil import extend_path +from .pipelines import VanillaBiometricsPipeline +from .biometric_algorithms import Distance +from .score_writers import FourColumnsScoreWriter, CSVScoreWriter +from .wrappers import BioAlgorithmCheckpointWrapper, BioAlgorithmDaskWrapper, dask_vanilla_biometrics -from .pipeline import VanillaBiometrics, dask_vanilla_biometrics - -__path__ = extend_path(__path__, __name__) +from .legacy import BioAlgorithmLegacy, DatabaseConnector \ No newline at end of file diff --git a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py index e27c27aabcd9d4f40cf87feeace3a497b094bab9..7e99ec91d6617db8390e5e7e3b5e5b1e364c2bc5 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py @@ -1,3 +1,7 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + + from abc import ABCMeta, abstractmethod from bob.pipelines.sample import Sample, SampleSet, DelayedSample import functools @@ -9,14 +13,9 @@ class BioAlgorithm(metaclass=ABCMeta): biometric model enrollement, via ``enroll()`` and scoring, with ``score()``. - Parameters - ---------- - - """ - def __init__(self, allow_score_multiple_references=False, **kwargs): - self.allow_score_multiple_references = allow_score_multiple_references + def __init__(self, **kwargs): self.stacked_biometric_references = None def enroll_samples(self, biometric_references): @@ -61,7 +60,12 @@ class BioAlgorithm(metaclass=ABCMeta): """ pass - def score_samples(self, probe_features, biometric_references, allow_scoring_with_all_biometric_references=False): + def score_samples( + self, + probe_features, + biometric_references, + allow_scoring_with_all_biometric_references=False, + ): """Scores a new sample against multiple (potential) references Parameters @@ -78,7 +82,7 @@ class BioAlgorithm(metaclass=ABCMeta): allow_scoring_with_all_biometric_references: bool If true will call `self.score_multiple_biometric_references`, at scoring time, to compute scores in one shot with multiple probes. - This optiization is useful when all probes needs to be compared with all biometric references AND + This optimization is useful when all probes needs to be compared with all biometric references AND your scoring function allows this broadcast computation. @@ -87,18 +91,29 @@ class BioAlgorithm(metaclass=ABCMeta): scores : list For each sample in a probe, returns as many scores as there are - samples in the probe, together with the probe's and the + samples in the probe, together with the probes and the relevant reference's subject identifiers. """ retval = [] for p in probe_features: - retval.append(self._score_sample_set(p, biometric_references, allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references)) + retval.append( + self._score_sample_set( + p, + biometric_references, + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, + ) + ) return retval - def _score_sample_set(self, sampleset, biometric_references, allow_scoring_with_all_biometric_references): - """Given a sampleset for probing, compute the scores and retures a sample set with the scores + def _score_sample_set( + self, + sampleset, + biometric_references, + allow_scoring_with_all_biometric_references, + ): + """Given a sampleset for probing, compute the scores and returns a sample set with the scores """ # Stacking the samples from a sampleset @@ -106,15 +121,10 @@ class BioAlgorithm(metaclass=ABCMeta): # Compute scores for each sample inside of the sample set # TODO: In some cases we want to compute 1 score per sampleset (IJB-C) - # We should add an agregator function here so we can properlly agregate samples from + # We should add an aggregator function here so we can properly aggregator samples from # a sampleset either after or before scoring. - # To be honest, this should be the default behaviour + # To be honest, this should be the default behavior retval = [] - - def _write_sample(ref, probe, score): - data = make_four_colums_score(ref.subject, probe.subject, probe.path, score) - return Sample(data, parent=ref) - for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)): # Creating one sample per comparison subprobe_scores = [] @@ -128,21 +138,25 @@ class BioAlgorithm(metaclass=ABCMeta): scores = self.score_multiple_biometric_references( self.stacked_biometric_references, s ) - + # Wrapping the scores in samples for ref, score in zip(biometric_references, scores): - subprobe_scores.append(_write_sample(ref, sampleset, score)) + subprobe_scores.append(Sample(score, parent=ref)) else: for ref in [ r for r in biometric_references if r.key in sampleset.references ]: score = self.score(ref.data, s) - subprobe_scores.append(_write_sample(ref, sampleset, score)) - - # Creating one sampleset per probe - subprobe = SampleSet(subprobe_scores, parent=sampleset) - subprobe.subprobe_id = subprobe_id + subprobe_scores.append(Sample(score, parent=ref)) + + # Fetching metadata from the probe + kwargs = dict( + (metadata, sampleset.__dict__[metadata]) + for metadata in sampleset.__dict__.keys() + if metadata not in ["samples", "key", "data", "load", "_data"] + ) + subprobe = SampleSet(subprobe_scores, parent=parent, **kwargs) retval.append(subprobe) return retval @@ -245,26 +259,23 @@ class Database(metaclass=ABCMeta): pass -def make_four_colums_score( - biometric_reference_subject, probe_subject, probe_path, score, -): - data = "{0} {1} {2} {3}\n".format( - biometric_reference_subject, probe_subject, probe_path, score, - ) - return data - - -def create_score_delayed_sample(path, probe): +class ScoreWriter(metaclass=ABCMeta): """ - Write scores in the four columns format + Defines base methods to read, write scores and concatenate scores + for :any:`BioAlgorithm` """ - with open(path, "w") as f: - for score_line in probe.samples: - f.write(score_line.data) + def __init__(self, extension=".txt"): + self.extension = extension - def load(): - with open(path) as f: - return f.read() + @abstractmethod + def write(self, sampleset, path): + pass - return DelayedSample(load, parent=probe) + @abstractmethod + def read(self, path): + pass + + @abstractmethod + def concatenate_write_scores(self, sampleset, path): + pass diff --git a/bob/bio/base/pipelines/vanilla_biometrics/implemented.py b/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithms.py similarity index 94% rename from bob/bio/base/pipelines/vanilla_biometrics/implemented.py rename to bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithms.py index f77e66d3386f35a44674d7944b51dbf92053ae2b..233c9863d7b07c98216f068922cfc45be3acf58a 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/implemented.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithms.py @@ -2,8 +2,10 @@ import scipy.spatial.distance from sklearn.utils.validation import check_array import numpy from .abstract_classes import BioAlgorithm -from .mixins import BioAlgCheckpointMixin from scipy.spatial.distance import cdist +import os +from bob.pipelines import DelayedSample, Sample, SampleSet +import functools class Distance(BioAlgorithm): @@ -69,7 +71,3 @@ class Distance(BioAlgorithm): ) return list(scores.flatten()) - - -class CheckpointDistance(BioAlgCheckpointMixin, Distance): - pass diff --git a/bob/bio/base/pipelines/vanilla_biometrics/legacy.py b/bob/bio/base/pipelines/vanilla_biometrics/legacy.py index bcf9840bd6e7c8dc7b570cb48756526730b89236..f1279b05e34030e2b3b5a300639fdfc833c843cf 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/legacy.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/legacy.py @@ -11,17 +11,16 @@ from bob.bio.base import utils from .abstract_classes import ( BioAlgorithm, Database, - create_score_delayed_sample, - make_four_colums_score, ) from bob.io.base import HDF5File -from bob.pipelines.mixins import SampleMixin, CheckpointMixin -from bob.pipelines.sample import DelayedSample, SampleSet, Sample -from bob.pipelines.utils import is_picklable -from sklearn.base import TransformerMixin, BaseEstimator +from bob.pipelines import DelayedSample, SampleSet, Sample import logging import copy +from .score_writers import FourColumnsScoreWriter + +from bob.bio.base.algorithm import Algorithm + logger = logging.getLogger("bob.bio.base") @@ -157,276 +156,106 @@ class DatabaseConnector(Database): return list(probes.values()) -class _NonPickableWrapper: - def __init__(self, callable, **kwargs): - super().__init__(**kwargs) - self.callable = callable - self._instance = None - - @property - def instance(self): - if self._instance is None: - self._instance = self.callable() - return self._instance +class BioAlgorithmLegacy(BioAlgorithm): + """Biometric Algorithm that handlesy :any:`bob.bio.base.algorithm.Algorithm` - def __setstate__(self, d): - # Handling unpicklable objects - self._instance = None - # return super().__setstate__(d) + In this design, :any:`BioAlgorithm.enroll` maps to :any:`bob.bio.base.algorithm.Algorithm.enroll` and + :any:`BioAlgorithm.score` maps :any:`bob.bio.base.algorithm.Algorithm.score` + + .. note:: + Legacy algorithms are always checkpointable - def __getstate__(self): - # Handling unpicklable objects - self._instance = None - # return super().__getstate__() - -class _Preprocessor(_NonPickableWrapper, TransformerMixin, BaseEstimator): - def transform(self, X, annotations=None): - if annotations is None: - return [self.instance(data) for data in X] - else: - return [self.instance(data, annot) for data, annot in zip(X, annotations)] - - def _more_tags(self): - return {"stateless": True, "requires_fit": False} + Parameters + ---------- + instance: object + An instance of :any:`bob.bio.base.algorithm.Algorithm` -def _get_pickable_method(method): - if not is_picklable(method): - logger.warning( - f"The method {method} is not picklable. Returning its unbounded method" - ) - method = functools.partial(method.__func__, None) - return method + Example + ------- + >>> from bob.bio.base.pipelines.vanilla_biometrics import BioAlgorithmLegacy + >>> from bob.bio.base.algorithm import PCA + >>> biometric_algorithm = BioAlgorithmLegacy(PCA()) + """ -class Preprocessor(CheckpointMixin, SampleMixin, _Preprocessor): def __init__( self, - callable, - transform_extra_arguments=(("annotations", "annotations"),), + instance, + base_dir, + force=False, + projector_file=None, + score_writer=FourColumnsScoreWriter(), **kwargs, ): - instance = callable() - super().__init__( - callable=callable, - transform_extra_arguments=transform_extra_arguments, - load_func=_get_pickable_method(instance.read_data), - save_func=_get_pickable_method(instance.write_data), - **kwargs, - ) - - -def _split_X_by_y(X, y): - training_data = defaultdict(list) - for x1, y1 in zip(X, y): - training_data[y1].append(x1) - training_data = training_data.values() - return training_data - - -class _Extractor(_NonPickableWrapper, TransformerMixin, BaseEstimator): - def transform(self, X, metadata=None): - if self.requires_metadata: - return [self.instance(data, metadata=m) for data, m in zip(X, metadata)] - else: - return [self.instance(data) for data in X] - - def fit(self, X, y=None): - if not self.instance.requires_training: - return self - - training_data = X - if self.instance.split_training_data_by_client: - training_data = _split_X_by_y(X, y) - - self.instance.train(self, training_data, self.model_path) - return self - - def _more_tags(self): - return { - "requires_fit": self.instance.requires_training, - "stateless": not self.instance.requires_training, - } - - -class Extractor(CheckpointMixin, SampleMixin, _Extractor): - def __init__(self, callable, model_path=None, **kwargs): - instance = callable() - - transform_extra_arguments = None - self.requires_metadata = False - if utils.is_argument_available("metadata", instance.__call__): - transform_extra_arguments = (("metadata", "metadata"),) - self.requires_metadata = True - - fit_extra_arguments = None - if instance.requires_training and instance.split_training_data_by_client: - fit_extra_arguments = (("y", "subject"),) - - super().__init__( - callable=callable, - transform_extra_arguments=transform_extra_arguments, - fit_extra_arguments=fit_extra_arguments, - load_func=_get_pickable_method(instance.read_feature), - save_func=_get_pickable_method(instance.write_feature), - model_path=model_path, - **kwargs, - ) - - def load_model(self): - self.instance.load(self.model_path) - return self - - def save_model(self): - # we have already saved the model in .fit() - return self - - -class _AlgorithmTransformer(_NonPickableWrapper, TransformerMixin, BaseEstimator): - def transform(self, X): - return [self.instance.project(feature) for feature in X] - - def fit(self, X, y=None): - if not self.instance.requires_projector_training: - return self - - training_data = X - if self.instance.split_training_features_by_client: - training_data = _split_X_by_y(X, y) - - self.instance.train_projector(training_data, self.model_path) - return self - - def _more_tags(self): - return {"requires_fit": self.instance.requires_projector_training} - + super().__init__(**kwargs) -class AlgorithmAsTransformer(CheckpointMixin, SampleMixin, _AlgorithmTransformer): - """Class that wraps :py:class:`bob.bio.base.algorithm.Algoritm` + if not isinstance(instance, Algorithm): + raise ValueError( + f"Only `bob.bio.base.Algorithm` supported, not `{instance}`" + ) + logger.info(f"Using `bob.bio.base` legacy algorithm {instance}") - :py:method:`LegacyAlgorithmrMixin.fit` maps to :py:method:`bob.bio.base.algorithm.Algoritm.train_projector` + if instance.requires_projector_training and projector_file is None: + raise ValueError(f"{instance} requires a `projector_file` to be set") - :py:method:`LegacyAlgorithmrMixin.transform` maps :py:method:`bob.bio.base.algorithm.Algoritm.project` + self.instance = instance + self.is_background_model_loaded = False - Example - ------- + self.projector_file = projector_file + self.biometric_reference_dir = os.path.join(base_dir, "biometric_references") + self._biometric_reference_extension = ".hdf5" + self.score_dir = os.path.join(base_dir, "scores") + self.score_writer = score_writer + self.force = force - Wrapping LDA algorithm with functtools - >>> from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyAlgorithmAsTransformer - >>> from bob.bio.base.algorithm import LDA - >>> import functools - >>> transformer = LegacyAlgorithmAsTransformer(functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90)) + def load_legacy_background_model(self): + # Loading background model + if not self.is_background_model_loaded: + self.instance.load_projector(self.projector_file) + self.is_background_model_loaded = True + def enroll(self, enroll_features, **kwargs): + self.load_legacy_background_model() + return self.instance.enroll(enroll_features) + def score(self, biometric_reference, data, **kwargs): + self.load_legacy_background_model() + scores = self.instance.score(biometric_reference, data) + if isinstance(scores, list): + scores = self.instance.probe_fusion_function(scores) + return scores - Parameters - ---------- - callable: callable - Calleble function that instantiates the bob.bio.base.algorithm.Algorithm + def score_multiple_biometric_references(self, biometric_references, data, **kwargs): + scores = self.instance.score_for_multiple_models(biometric_references, data) + return scores - """ + def write_biometric_reference(self, sample, path): + os.makedirs(os.path.dirname(path), exist_ok=True) + self.instance.write_model(sample.data, path) - def __init__(self, callable, model_path, **kwargs): - instance = callable() - - fit_extra_arguments = None - if ( - instance.requires_projector_training - and instance.split_training_features_by_client - ): - fit_extra_arguments = (("y", "subject"),) - - super().__init__( - callable=callable, - fit_extra_arguments=fit_extra_arguments, - load_func=_get_pickable_method(instance.read_feature), - save_func=_get_pickable_method(instance.write_feature), - model_path=model_path, - **kwargs, + def _enroll_sample_set(self, sampleset): + """ + Enroll a sample set with checkpointing + """ + # Amending `models` directory + path = os.path.join( + self.biometric_reference_dir, + str(sampleset.key) + self._biometric_reference_extension, ) - def load_model(self): - self.instance.load_projector(self.model_path) - return self - - def save_model(self): - # we have already saved the model in .fit() - return self - - -class AlgorithmAsBioAlg(_NonPickableWrapper, BioAlgorithm): - """Biometric Algorithm that handles legacy :py:class:`bob.bio.base.algorithm.Algorithm` - - - :py:method:`BioAlgorithm.enroll` maps to :py:method:`bob.bio.base.algorithm.Algoritm.enroll` - - :py:method:`BioAlgorithm.score` maps :py:method:`bob.bio.base.algorithm.Algoritm.score` - - - Example - ------- + if self.force or not os.path.exists(path): + enrolled_sample = super()._enroll_sample_set(sampleset) + # saving the new sample + self.write_biometric_reference(enrolled_sample, path) - Parameters - ---------- - callable: callable - Calleble function that instantiates the bob.bio.base.algorithm.Algorithm - - """ - - def __init__( - self, callable, features_dir, extension=".hdf5", model_path=None, **kwargs - ): - super().__init__(callable, **kwargs) - self.features_dir = features_dir - self.biometric_reference_dir = os.path.join( - self.features_dir, "biometric_references" + delayed_enrolled_sample = DelayedSample( + functools.partial(self.instance.read_model, path), parent=sampleset ) - self.score_dir = os.path.join(self.features_dir, "scores") - self.extension = extension - self.model_path = model_path - self.is_projector_loaded = False - - def _enroll_sample_set(self, sampleset): - # Enroll - return self.enroll(sampleset) - - def _load_projector(self): - """ - Run :py:meth:`bob.bio.base.algorithm.Algorithm.load_projector` if necessary by - :py:class:`bob.bio.base.algorithm.Algorithm` - """ - if self.instance.performs_projection and not self.is_projector_loaded: - if self.model_path is None: - raise ValueError( - "Algorithm " + f"{self. instance} performs_projection. Hence, " - "`model_path` needs to passed in `AlgorithmAsBioAlg.__init__`" - ) - else: - # Loading model - self.instance.load_projector(self.model_path) - self.is_projector_loaded = True - def _restore_state_of_ref(self, ref): - """ - There are some algorithms that :py:meth:`bob.bio.base.algorithm.Algorithm.read_model` or - :py:meth:`bob.bio.base.algorithm.Algorithm.read_feature` depends - on the state of `self` to be properly loaded. - In these cases, it's not possible to rely only in the unbounded method extracted by - :py:func:`_get_pickable_method`. - - This function replaces the current state of these objects (that are not) - by bounding them with `self.instance` - """ - - if isinstance(ref, DelayedSample): - new_ref = copy.copy(ref) - - new_ref.load = functools.partial(ref.load.func, self.instance, ref.load.args[1]) - return new_ref - else: - return ref + return delayed_enrolled_sample def _score_sample_set( self, @@ -434,105 +263,14 @@ class AlgorithmAsBioAlg(_NonPickableWrapper, BioAlgorithm): biometric_references, allow_scoring_with_all_biometric_references=False, ): - """Given a sampleset for probing, compute the scores and retures a sample set with the scores - """ - - # Compute scores for each sample inside of the sample set - # TODO: In some cases we want to compute 1 score per sampleset (IJB-C) - # We should add an agregator function here so we can properlly agregate samples from - # a sampleset either after or before scoring. - # To be honest, this should be the default behaviour - - def _write_sample(ref, probe, score): - data = make_four_colums_score(ref.subject, probe.subject, probe.path, score) - return Sample(data, parent=ref) - - self._load_projector() - retval = [] - - for subprobe_id, s in enumerate(sampleset.samples): - # Creating one sample per comparison - subprobe_scores = [] - - if allow_scoring_with_all_biometric_references: - if self.stacked_biometric_references is None: - - if self.instance.performs_projection: - # Hydrating the state of biometric references - biometric_references = [ - self._restore_state_of_ref(ref) - for ref in biometric_references - ] - - self.stacked_biometric_references = [ - ref.data for ref in biometric_references - ] - - s = self._restore_state_of_ref(s) - scores = self.score_multiple_biometric_references( - self.stacked_biometric_references, s.data - ) - # Wrapping the scores in samples - for ref, score in zip(biometric_references, scores): - subprobe_scores.append(_write_sample(ref, sampleset, score)) - - else: - for ref in [ - r for r in biometric_references if r.key in sampleset.references - ]: - ref = self._restore_state_of_ref(ref) - score = self.score(ref.data, s.data) - subprobe_scores.append(_write_sample(ref, sampleset, score)) - - # Creating one sampleset per probe - subprobe = SampleSet(subprobe_scores, parent=sampleset) - subprobe.subprobe_id = subprobe_id - - # Checkpointing score MANDATORY FOR LEGACY - path = os.path.join(self.score_dir, str(subprobe.path) + ".txt") - os.makedirs(os.path.dirname(path), exist_ok=True) - - delayed_scored_sample = create_score_delayed_sample(path, subprobe) - subprobe.samples = [delayed_scored_sample] - - retval.append(subprobe) - - return retval - - def enroll(self, enroll_features, **kwargs): - - if not isinstance(enroll_features, SampleSet): - raise ValueError( - f"`enroll_features` should be the type SampleSet, not {enroll_features}" - ) - - path = os.path.join( - self.biometric_reference_dir, str(enroll_features.key) + self.extension + path = os.path.join(self.score_dir, str(sampleset.key)) + # Computing score + scored_sample_set = super()._score_sample_set( + sampleset, + biometric_references, + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, ) - self._load_projector() - if path is None or not os.path.isfile(path): - # Enrolling - data = [s.data for s in enroll_features.samples] - model = self.instance.enroll(data) - # Checkpointing - os.makedirs(os.path.dirname(path), exist_ok=True) - self.instance.write_model(model, path) + scored_sample_set = self.score_writer.write(scored_sample_set, path) - reader = _get_pickable_method(self.instance.read_model) - return DelayedSample(functools.partial(reader, path), parent=enroll_features) - - def score(self, biometric_reference, data, **kwargs): - return self.instance.score(biometric_reference, data) - - def score_multiple_biometric_references(self, biometric_references, data, **kwargs): - """ - It handles the score computation of one probe against multiple biometric references using legacy - `bob.bio.base` - - Basically it wraps :py:meth:`bob.bio.base.algorithm.Algorithm.score_for_multiple_models`. - - """ - - scores = self.instance.score_for_multiple_models(biometric_references, data) - return scores + return scored_sample_set diff --git a/bob/bio/base/pipelines/vanilla_biometrics/mixins.py b/bob/bio/base/pipelines/vanilla_biometrics/mixins.py deleted file mode 100644 index d21bc9c8f1bed30560639db35a0cdac8b525413d..0000000000000000000000000000000000000000 --- a/bob/bio/base/pipelines/vanilla_biometrics/mixins.py +++ /dev/null @@ -1,122 +0,0 @@ -from bob.pipelines.mixins import CheckpointMixin -from bob.pipelines.sample import DelayedSample -import bob.io.base -import os -import functools -import dask -from .abstract_classes import create_score_delayed_sample - - -class BioAlgCheckpointMixin(CheckpointMixin): - """Mixing used to checkpoint Enrolled and Scoring samples. - - Examples - -------- - - >>> from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import BioAlgCheckpointMixin, Distance - >>> class DistanceCheckpoint(BioAlgCheckpointMixin, Distance) pass: - >>> biometric_algorithm = DistanceCheckpoint(features_dir="./") - >>> biometric_algorithm.enroll(sample) - - It's possible to use it as with the :py:func:`bob.pipelines.mixins.mix_me_up` - - >>> from bob.pipelines.mixins import mix_me_up - >>> biometric_algorithm = mix_me_up([BioAlgCheckpointMixin], Distance)(features_dir="./") - >>> biometric_algorithm.enroll(sample) - - """ - - def __init__(self, features_dir="", **kwargs): - super().__init__(features_dir=features_dir, **kwargs) - self.biometric_reference_dir = os.path.join( - features_dir, "biometric_references" - ) - self.score_dir = os.path.join(features_dir, "scores") - - def save(self, sample, path): - return bob.io.base.save(sample.data, path, create_directories=True) - - def _enroll_sample_set(self, sampleset): - """ - Enroll a sample set with checkpointing - """ - - # Amending `models` directory - path = os.path.join( - self.biometric_reference_dir, str(sampleset.key) + self.extension - ) - if path is None or not os.path.isfile(path): - - # Enrolling the sample - enrolled_sample = super()._enroll_sample_set(sampleset) - - # saving the new sample - self.save(enrolled_sample, path) - - # Dealaying it. - # This seems inefficient, but it's crucial for large datasets - delayed_enrolled_sample = DelayedSample( - functools.partial(bob.io.base.load, path), enrolled_sample - ) - - else: - # If sample already there, just load - delayed_enrolled_sample = self.load(sampleset, path) - - return delayed_enrolled_sample - - def _score_sample_set( - self, - sampleset, - biometric_references, - allow_scoring_with_all_biometric_references=False - ): - """Given a sampleset for probing, compute the scores and retures a sample set with the scores - """ - - # Computing score - scored_sample_set = super()._score_sample_set( - sampleset, - biometric_references, - allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, - ) - for s in scored_sample_set: - # Checkpointing score - path = os.path.join(self.score_dir, str(s.path) + ".txt") - os.makedirs(os.path.dirname(path), exist_ok=True) - - delayed_scored_sample = create_score_delayed_sample(path, s) - s.samples = [delayed_scored_sample] - - return scored_sample_set - - -class BioAlgDaskMixin: - def enroll_samples(self, biometric_reference_features): - biometric_references = biometric_reference_features.map_partitions( - super().enroll_samples - ) - return biometric_references - - def score_samples( - self, - probe_features, - biometric_references, - allow_scoring_with_all_biometric_references=False, - ): - - # TODO: Here, we are sending all computed biometric references to all - # probes. It would be more efficient if only the models related to each - # probe are sent to the probing split. An option would be to use caching - # and allow the ``score`` function above to load the required data from - # the disk, directly. A second option would be to generate named delays - # for each model and then associate them here. - - all_references = dask.delayed(list)(biometric_references) - - scores = probe_features.map_partitions( - super().score_samples, - all_references, - allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, - ) - return scores diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py similarity index 85% rename from bob/bio/base/pipelines/vanilla_biometrics/pipeline.py rename to bob/bio/base/pipelines/vanilla_biometrics/pipelines.py index c5d0cbf487c708d2b8d6a8e5ef01cfecbc8fa897..434c5208cb168d0d02094438e8454f95d17674b6 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py @@ -14,7 +14,7 @@ import numpy logger = logging.getLogger(__name__) -class VanillaBiometrics(object): +class VanillaBiometricsPipeline(object): """ Vanilla Biometrics Pipeline @@ -120,7 +120,7 @@ class VanillaBiometrics(object): biometric_reference_features = self.transformer.transform( biometric_reference_samples ) - + biometric_references = self.biometric_algorithm.enroll_samples( biometric_reference_features ) @@ -137,7 +137,7 @@ class VanillaBiometrics(object): # probes is a list of SampleSets probe_features = self.transformer.transform(probe_samples) - + scores = self.biometric_algorithm.score_samples( probe_features, biometric_references, @@ -146,28 +146,3 @@ class VanillaBiometrics(object): # scores is a list of Samples return scores - - -def dask_vanilla_biometrics(pipeline, npartitions=None): - """ - Given a :py:class:`VanillaBiometrics`, wraps :py:meth:`VanillaBiometrics.transformer` and - :py:class:`VanillaBiometrics.biometric_algorithm` with Dask delayeds - - Parameters - ---------- - - pipeline: :py:class:`VanillaBiometrics` - Vanilla Biometrics based pipeline to be dasked - - - npartitions: int - Number of partitions for the initial `Dask.bag` - """ - - from bob.pipelines.mixins import estimator_dask_it, mix_me_up - from bob.bio.base.pipelines.vanilla_biometrics.mixins import BioAlgDaskMixin - - transformer = estimator_dask_it(pipeline.transformer, npartitions=npartitions) - biometric_algorithm = mix_me_up([BioAlgDaskMixin], pipeline.biometric_algorithm) - - return VanillaBiometrics(transformer, biometric_algorithm) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py b/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py new file mode 100644 index 0000000000000000000000000000000000000000..2574406cbce7211d22d17560c843afaeaf19a400 --- /dev/null +++ b/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + + +import os +from bob.pipelines import SampleSet, DelayedSample +from .abstract_classes import ScoreWriter +import functools +import csv + + +class FourColumnsScoreWriter(ScoreWriter): + """ + Read and write scores using the four columns format + :any:`bob.bio.base.score.load.four_column` + """ + + def write(self, probe_sampleset, path): + """ + Write scores and returns a :any:`bob.pipelines.DelayedSample` containing + the instruction to open the score file + """ + os.makedirs(path, exist_ok=True) + checkpointed_scores = [] + + for probe in probe_sampleset: + + lines = [ + "{0} {1} {2} {3}\n".format( + biometric_reference.subject, + probe.subject, + probe.key, + biometric_reference.data, + ) + for biometric_reference in probe + ] + filename = os.path.join(path, str(probe.subject)) + ".txt" + open(filename, "w").writelines(lines) + checkpointed_scores.append( + SampleSet( + [ + DelayedSample( + functools.partial(self.read, filename), parent=probe + ) + ], + parent=probe, + ) + ) + return checkpointed_scores + + def read(self, path): + """ + Base Instruction to load a score file + """ + return open(path).readlines() + + def concatenate_write_scores(self, samplesets_list, filename): + """ + Given a list of samplsets, write them all in a single file + """ + os.makedirs(os.path.dirname(filename), exist_ok=True) + f = open(filename, "w") + for samplesets in samplesets_list: + for sset in samplesets: + for s in sset: + f.writelines(s.data) + + +class CSVScoreWriter(ScoreWriter): + """ + Read and write scores in CSV format, shipping all metadata with the scores + + Parameters + ---------- + + n_sample_sets: + Number of samplesets in one chunk + + """ + + def __init__(self, n_sample_sets=1000): + self.n_sample_sets = n_sample_sets + + def write(self, probe_sampleset, path): + """ + Write scores and returns a :any:`bob.pipelines.DelayedSample` containing + the instruction to open the score file + """ + + exclude_list = ["samples", "key", "data", "load", "_data", "references"] + + def create_csv_header(probe_sampleset): + first_biometric_reference = probe_sampleset[0] + + probe_dict = dict( + (k, f"probe_{k}") + for k in probe_sampleset.__dict__.keys() + if k not in exclude_list + ) + + bioref_dict = dict( + (k, f"bio_ref_{k}") + for k in first_biometric_reference.__dict__.keys() + if k not in exclude_list + ) + + header = ( + ["probe_key"] + + [probe_dict[k] for k in probe_dict] + + [bioref_dict[k] for k in bioref_dict] + + ["score"] + ) + return header, probe_dict, bioref_dict + + os.makedirs(path, exist_ok=True) + checkpointed_scores = [] + + header, probe_dict, bioref_dict = create_csv_header(probe_sampleset[0]) + + for probe in probe_sampleset: + filename = os.path.join(path, str(probe.subject)) + ".csv" + with open(filename, "w") as f: + + csv_write = csv.writer(f) + csv_write.writerow(header) + + rows = [] + probe_row = [str(probe.key)] + [ + str(probe.__dict__[k]) for k in probe_dict.keys() + ] + + for biometric_reference in probe: + bio_ref_row = [ + str(biometric_reference.__dict__[k]) + for k in list(bioref_dict.keys()) + ["data"] + ] + + rows.append(probe_row + bio_ref_row) + + csv_write.writerows(rows) + checkpointed_scores.append( + SampleSet( + [ + DelayedSample( + functools.partial(self.read, filename), parent=probe + ) + ], + parent=probe, + ) + ) + return checkpointed_scores + + def read(self, path): + """ + Base Instruction to load a score file + """ + return open(path).readlines() + + def concatenate_write_scores(self, samplesets_list, filename): + """ + Given a list of samplsets, write them all in a single file + """ + + # CSV files tends to be very big + # here, here we write them in chunks + + base_dir = os.path.splitext(filename)[0] + os.makedirs(base_dir, exist_ok=True) + f = None + for i, samplesets in enumerate(samplesets_list): + if i% self.n_sample_sets==0: + if f is not None: + f.close() + del f + + filename = os.path.join(base_dir, f"chunk_{i}.csv") + f = open(filename, "w") + + for sset in samplesets: + for s in sset: + if i==0: + f.writelines(s.data) + else: + f.writelines(s.data[1:]) + samplesets_list[i] = None \ No newline at end of file diff --git a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..973039625200b4af245d1b8352637ea1d22fafcb --- /dev/null +++ b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py @@ -0,0 +1,193 @@ +from bob.pipelines import DelayedSample +import bob.io.base +import os +import dask +import functools +from .score_writers import FourColumnsScoreWriter +from .abstract_classes import BioAlgorithm + +import bob.pipelines as mario + + +class BioAlgorithmCheckpointWrapper(BioAlgorithm): + """Wrapper used to checkpoint enrolled and Scoring samples. + + Parameters + ---------- + biometric_algorithm: :any:`BioAlgorithm` + An implemented :any:`BioAlgorithm` + + base_dir: str + Path to store biometric references and scores + + extension: str + File extension + + score_writer: :any:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classe.ScoreWriter` + Format to write scores. Default to :any:`FourColumnsScoreWriter` + + force: bool + If True, will recompute scores and biometric references no matter if a file exists + + Examples + -------- + + >>> from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import BioAlgCheckpointWrapper, Distance + >>> biometric_algorithm = BioAlgCheckpointWrapper(Distance(), base_dir="./") + >>> biometric_algorithm.enroll(sample) + + """ + + def __init__( + self, + biometric_algorithm, + base_dir, + score_writer=FourColumnsScoreWriter(), + force=False, + **kwargs + ): + super().__init__(**kwargs) + + self.biometric_reference_dir = os.path.join(base_dir, "biometric_references") + self.score_dir = os.path.join(base_dir, "scores") + self.biometric_algorithm = biometric_algorithm + self.force = force + self._biometric_reference_extension = ".hdf5" + self.score_writer = score_writer + + def enroll(self, enroll_features): + return self.biometric_algorithm.enroll(enroll_features) + + def score(self, biometric_reference, data): + return self.biometric_algorithm.score(biometric_reference, data) + + def score_multiple_biometric_references(self, biometric_references, data): + return self.biometric_algorithm.score_multiple_biometric_references( + biometric_references, data + ) + + def write_biometric_reference(self, sample, path): + return bob.io.base.save(sample.data, path, create_directories=True) + + def _enroll_sample_set(self, sampleset): + """ + Enroll a sample set with checkpointing + """ + + # Amending `models` directory + path = os.path.join( + self.biometric_reference_dir, + str(sampleset.key) + self._biometric_reference_extension, + ) + if self.force or not os.path.exists(path): + + enrolled_sample = self.biometric_algorithm._enroll_sample_set(sampleset) + + # saving the new sample + self.write_biometric_reference(enrolled_sample, path) + + # This seems inefficient, but it's crucial for large datasets + delayed_enrolled_sample = DelayedSample( + functools.partial(bob.io.base.load, path), parent=sampleset + ) + + return delayed_enrolled_sample + + def _score_sample_set( + self, + sampleset, + biometric_references, + allow_scoring_with_all_biometric_references=False, + ): + """Given a sampleset for probing, compute the scores and returns a sample set with the scores + """ + + # TODO: WE CAN'T REUSE THE ALREADY WRITTEN SCORE FILE FOR LOADING + # UNLESS WE SAVE THE PICKLED SAMPLESET WITH THE SCORES + + path = os.path.join(self.score_dir, str(sampleset.key)) + + # Computing score + scored_sample_set = self.biometric_algorithm._score_sample_set( + sampleset, + biometric_references, + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, + ) + + scored_sample_set = self.score_writer.write(scored_sample_set, path) + + return scored_sample_set + + +class BioAlgorithmDaskWrapper(BioAlgorithm): + def __init__(self, biometric_algorithm, **kwargs): + self.biometric_algorithm = biometric_algorithm + # Copying attribute + if hasattr(biometric_algorithm, "score_writer"): + self.score_writer = biometric_algorithm.score_writer + + def enroll_samples(self, biometric_reference_features): + + biometric_references = biometric_reference_features.map_partitions( + self.biometric_algorithm.enroll_samples + ) + return biometric_references + + def score_samples( + self, + probe_features, + biometric_references, + allow_scoring_with_all_biometric_references=False, + ): + + # TODO: Here, we are sending all computed biometric references to all + # probes. It would be more efficient if only the models related to each + # probe are sent to the probing split. An option would be to use caching + # and allow the ``score`` function above to load the required data from + # the disk, directly. A second option would be to generate named delays + # for each model and then associate them here. + + all_references = dask.delayed(list)(biometric_references) + + scores = probe_features.map_partitions( + self.biometric_algorithm.score_samples, + all_references, + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, + ) + return scores + + def enroll(self, data): + return self.biometric_algorithm.enroll(data) + + def score(self, biometric_reference, data): + return self.biometric_algorithm.score(biometric_reference, data) + + def score_multiple_biometric_references(self, biometric_references, data): + return self.biometric_algorithm.score_multiple_biometric_references( + biometric_references, data + ) + + +def dask_vanilla_biometrics(vanila_biometrics_pipeline, npartitions=None): + """ + Given a :any:`VanillaBiometrics`, wraps :any:`VanillaBiometrics.transformer` and + :any:`VanillaBiometrics.biometric_algorithm` to be executed with dask + + Parameters + ---------- + + vanila_biometrics_pipeline: :any:`VanillaBiometrics` + Vanilla Biometrics based pipeline to be dasked + + npartitions: int + Number of partitions for the initial :any:`dask.bag` + """ + + vanila_biometrics_pipeline.transformer = mario.wrap( + ["dask"], vanila_biometrics_pipeline.transformer, npartitions=npartitions + ) + vanila_biometrics_pipeline.biometric_algorithm = BioAlgorithmDaskWrapper( + vanila_biometrics_pipeline.biometric_algorithm + ) + + return vanila_biometrics_pipeline diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py index ea0ad30ec52ce3ea7f6937b890ba2c47f0f97f43..f2f4e71331c995fd938ca06406f97cef46e609a6 100644 --- a/bob/bio/base/script/vanilla_biometrics.py +++ b/bob/bio/base/script/vanilla_biometrics.py @@ -14,6 +14,14 @@ from bob.extension.scripts.click_helper import ( ) import logging +import os +import itertools +import dask.bag +from bob.bio.base.pipelines.vanilla_biometrics import ( + VanillaBiometricsPipeline, + BioAlgorithmCheckpointWrapper, +) + logger = logging.getLogger(__name__) @@ -77,7 +85,6 @@ TODO: Work out this help "-l", required=False, cls=ResourceOption, - entry_point_group="bob.pipelines.client", # This should be linked to bob.bio.base help="Dask client for the execution of the pipeline.", ) @click.option( @@ -97,9 +104,7 @@ TODO: Work out this help help="Name of output directory", ) @verbosity_option(cls=ResourceOption) -def vanilla_biometrics( - pipeline, database, dask_client, groups, output, **kwargs -): +def vanilla_biometrics(pipeline, database, dask_client, groups, output, **kwargs): """Runs the simplest biometrics pipeline. Such pipeline consists into three sub-pipelines. @@ -144,48 +149,47 @@ def vanilla_biometrics( """ - from bob.bio.base.pipelines.vanilla_biometrics.pipeline import VanillaBiometrics - import dask.bag - import itertools - import os - from bob.pipelines.sample import Sample, DelayedSample - if not os.path.exists(output): os.makedirs(output, exist_ok=True) for group in groups: - with open(os.path.join(output, f"scores-{group}"), "w") as f: - biometric_references = database.references(group=group) - - logger.info(f"Running vanilla biometrics for group {group}") - - allow_scoring_with_all_biometric_references = ( - database.allow_scoring_with_all_biometric_references - if hasattr(database, "allow_scoring_with_all_biometric_references") - else False + score_file_name = os.path.join(output, f"scores-{group}.txt") + biometric_references = database.references(group=group) + + logger.info(f"Running vanilla biometrics for group {group}") + allow_scoring_with_all_biometric_references = ( + database.allow_scoring_with_all_biometric_references + if hasattr(database, "allow_scoring_with_all_biometric_references") + else False + ) + + result = pipeline( + database.background_model_samples(), + biometric_references, + database.probes(group=group), + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, + ) + + if isinstance(result, dask.bag.core.Bag): + if dask_client is not None: + result = result.compute(scheduler=dask_client) + else: + logger.warning("`dask_client` not set. Your pipeline will run locally") + result = result.compute(scheduler="single-threaded") + + # Check if there's a score writer hooked in + if hasattr(pipeline.biometric_algorithm, "score_writer"): + pipeline.biometric_algorithm.score_writer.concatenate_write_scores( + result, score_file_name ) - - result = pipeline(database.background_model_samples(), - biometric_references, - database.probes(group=group), - allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references - ) - - if isinstance(result, dask.bag.core.Bag): - if dask_client is not None: - result = result.compute(scheduler=dask_client) - else: - logger.warning( - "`dask_client` not set. Your pipeline will run locally" - ) - result = result.compute(scheduler="single-threaded") - - # Flatting out the list - result = itertools.chain(*result) - for probe in result: - for sample in probe.samples: - f.write(sample.data) + else: + with open(score_file_name, "w") as f: + # Flatting out the list + result = itertools.chain(*result) + for probe in result: + for sample in probe.samples: + f.writelines(sample.data) if dask_client is not None: dask_client.shutdown() diff --git a/bob/bio/base/test/test_algorithms.py b/bob/bio/base/test/test_algorithms.py index 8b03c5e32599244677fe5e2ed5db437680f7a3b7..c023812e3ba32bf8d9d58e4cd2dae4b81b331a44 100644 --- a/bob/bio/base/test/test_algorithms.py +++ b/bob/bio/base/test/test_algorithms.py @@ -111,7 +111,7 @@ def test_pca(): # compare model with probe probe = pca1.read_feature(pkg_resources.resource_filename('bob.bio.base.test', 'data/pca_projected.hdf5')) reference_score = -251.53563107 - assert abs(pca1.score(model, probe) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (pca1.score(model, probe), reference_score) + assert abs(numpy.mean(pca1.score(model, probe)) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (pca1.score(model, probe), reference_score) assert abs(pca1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5 # test the calculation of the subspace dimension based on percentage of variance @@ -180,12 +180,11 @@ def test_lda(): # enroll model from random features enroll = utils.random_training_set(5, 5, 0., 255., seed=21) model = lda1.enroll(enroll) - _compare(model, pkg_resources.resource_filename('bob.bio.base.test', 'data/lda_model.hdf5'), lda1.write_model, lda1.read_model) - + _compare(model, pkg_resources.resource_filename('bob.bio.base.test', 'data/lda_model.hdf5'), lda1.write_model, lda1.read_model) # compare model with probe probe = lda1.read_feature(pkg_resources.resource_filename('bob.bio.base.test', 'data/lda_projected.hdf5')) reference_score = -233.30450012 - assert abs(lda1.score(model, probe) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (lda1.score(model, probe), reference_score) + assert abs(numpy.mean(lda1.score(model, probe)) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (lda1.score(model, probe), reference_score) assert abs(lda1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5 # test the calculation of the subspace dimension based on percentage of variance diff --git a/bob/bio/base/test/test_transformers.py b/bob/bio/base/test/test_transformers.py index 5fa1b098d1f4d92475d570fa098917bd6a5ee6ac..0bd43dcab8ab5e655b346ba893a3b845553bffa3 100644 --- a/bob/bio/base/test/test_transformers.py +++ b/bob/bio/base/test/test_transformers.py @@ -1,68 +1,298 @@ #!/usr/bin/env python # vim: set fileencoding=utf-8 : -# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> -from bob.pipelines.sample import Sample, SampleSet, DelayedSample -import os -import numpy +from bob.bio.base.preprocessor import Preprocessor +from bob.bio.base.extractor import Extractor +from bob.bio.base.algorithm import Algorithm +import scipy +from bob.bio.base.transformers import ( + PreprocessorTransformer, + ExtractorTransformer, + AlgorithmTransformer, +) +import bob.pipelines as mario +import numpy as np import tempfile -from sklearn.utils.validation import check_is_fitted +import os +import bob.io.base +from bob.bio.base.wrappers import ( + wrap_checkpoint_preprocessor, + wrap_checkpoint_extractor, + wrap_checkpoint_algorithm, + wrap_sample_preprocessor, + wrap_sample_extractor, + wrap_sample_algorithm, +) +from sklearn.pipeline import make_pipeline + + +class FakePreprocesor(Preprocessor): + def __call__(self, data, annotations=None): + return data + annotations + + +class FakeExtractor(Extractor): + def __call__(self, data): + return data.flatten() + + +class FakeExtractorFittable(Extractor): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.requires_training = True + self.model = None + + def __call__(self, data, metadata=None): + model = self.model + return data @ model + + def train(self, training_data, extractor_file): + self.model = np.vstack(training_data) + bob.io.base.save(self.model, extractor_file) + + +class FakeAlgorithm(Algorithm): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.requires_projector_training = True + self.split_training_features_by_client = True + self.model = None + + def project(self, data): + return data + self.model + + def train_projector(self, training_features, projector_file): + self.model = np.sum(np.vstack(training_features), axis=0) + bob.io.base.save(self.model, projector_file) + + def load_projector(self, projector_file): + self.model = bob.io.base.load(projector_file) + + def enroll(self, enroll_features): + return np.mean(enroll_features, axis=0) + + def score(self, model, data): + return scipy.spatial.distance.euclidean(model, data) + + +def generate_samples(n_subjects, n_samples_per_subject, shape=(2, 2), annotations=1): + """ + Simple sample generator that generates a certain number of samples per + subject, whose data is np.zeros + subject index + """ + + samples = [] + for i in range(n_subjects): + data = np.zeros(shape) + i + for j in range(n_samples_per_subject): + samples += [ + mario.Sample( + data, + subject=str(i), + key=str(i * n_subjects + j), + annotations=annotations, + ) + ] + return samples + + +def assert_sample(transformed_sample, oracle): + return np.alltrue( + [np.allclose(ts.data, o) for ts, o in zip(transformed_sample, oracle)] + ) + + +def assert_checkpoints(transformed_sample, dir_name): + return np.alltrue( + [ + os.path.exists(os.path.join(dir_name, ts.key + ".h5")) + for ts in transformed_sample + ] + ) + + +def test_preprocessor(): + + preprocessor = FakePreprocesor() + preprocessor_transformer = PreprocessorTransformer(preprocessor) + + # Testing sample + transform_extra_arguments = [("annotations", "annotations")] + sample_transformer = mario.SampleWrapper( + preprocessor_transformer, transform_extra_arguments + ) + + data = np.zeros((2, 2)) + oracle = [np.ones((2, 2))] + annotations = 1 + sample = [mario.Sample(data, key="1", annotations=annotations)] + transformed_sample = sample_transformer.transform(sample) + + assert assert_sample(transformed_sample, oracle) + + # Testing checkpoint + with tempfile.TemporaryDirectory() as dir_name: + checkpointing_transformer = mario.CheckpointWrapper( + sample_transformer, + features_dir=dir_name, + load_func=preprocessor.read_data, + save_func=preprocessor.write_data, + ) + transformed_sample = checkpointing_transformer.transform(sample) + + assert assert_sample(transformed_sample, oracle) + assert assert_checkpoints(transformed_sample, dir_name) + + +def test_extractor(): + + extractor = FakeExtractor() + extractor_transformer = ExtractorTransformer(extractor) + + # Testing sample + sample_transformer = mario.SampleWrapper(extractor_transformer) + + data = np.zeros((2, 2)) + oracle = [np.zeros((1, 4))] + sample = [mario.Sample(data, key="1")] + transformed_sample = sample_transformer.transform(sample) + + assert assert_sample(transformed_sample, oracle) + + # Testing checkpoint + with tempfile.TemporaryDirectory() as dir_name: + checkpointing_transformer = mario.CheckpointWrapper( + sample_transformer, + features_dir=dir_name, + load_func=extractor.read_feature, + save_func=extractor.write_feature, + ) + transformed_sample = checkpointing_transformer.transform(sample) + + assert assert_sample(transformed_sample, oracle) + assert assert_checkpoints(transformed_sample, dir_name) + + +def test_extractor_fittable(): + + with tempfile.TemporaryDirectory() as dir_name: + + extractor_file = os.path.join(dir_name, "Extractor.hdf5") + extractor = FakeExtractorFittable() + extractor_transformer = ExtractorTransformer( + extractor, model_path=extractor_file + ) + + # Testing sample + sample_transformer = mario.SampleWrapper(extractor_transformer) + # Fitting + training_data = np.arange(4).reshape(2, 2) + training_samples = [mario.Sample(training_data, key="1")] + sample_transformer = sample_transformer.fit(training_samples) + + test_data = [np.zeros((2, 2)), np.ones((2, 2))] + oracle = [np.zeros((2, 2)), np.ones((2, 2)) @ training_data] + test_sample = [mario.Sample(d, key=str(i)) for i, d in enumerate(test_data)] + + transformed_sample = sample_transformer.transform(test_sample) + assert assert_sample(transformed_sample, oracle) -from bob.pipelines.transformers import Linearize, SampleLinearize, CheckpointSampleLinearize -def test_linearize_processor(): + # Testing checkpoint + checkpointing_transformer = mario.CheckpointWrapper( + sample_transformer, + features_dir=dir_name, + load_func=extractor.read_feature, + save_func=extractor.write_feature, + ) + transformed_sample = checkpointing_transformer.transform(test_sample) + assert assert_sample(transformed_sample, oracle) + assert assert_checkpoints(transformed_sample, dir_name) - ## Test the transformer only - transformer = Linearize() - X = numpy.zeros(shape=(10,10)) - X_tr = transformer.transform(X) - assert X_tr.shape == (100,) +def test_algorithm(): - ## Test wrapped in to a Sample - sample = Sample(X, key="1") - transformer = SampleLinearize() - X_tr = transformer.transform([sample]) - assert X_tr[0].data.shape == (100,) + with tempfile.TemporaryDirectory() as dir_name: - ## Test checkpoint - with tempfile.TemporaryDirectory() as d: - transformer = CheckpointSampleLinearize(features_dir=d) - X_tr = transformer.transform([sample]) - assert X_tr[0].data.shape == (100,) - assert os.path.exists(os.path.join(d, "1.h5")) + projector_file = os.path.join(dir_name, "Projector.hdf5") + projector_pkl = os.path.join(dir_name, "Projector.pkl") # Testing pickling + algorithm = FakeAlgorithm() + algorithm_transformer = AlgorithmTransformer( + algorithm, projector_file=projector_file + ) -from bob.pipelines.transformers import SamplePCA, CheckpointSamplePCA -def test_pca_processor(): + # Testing sample + fit_extra_arguments = [("y", "subject")] + sample_transformer = mario.SampleWrapper( + algorithm_transformer, fit_extra_arguments=fit_extra_arguments + ) - ## Test wrapped in to a Sample - X = numpy.random.rand(100,10) - samples = [Sample(data, key=str(i)) for i, data in enumerate(X)] + n_subjects = 2 + n_samples_per_subject = 2 + shape = (2, 2) + training_samples = generate_samples( + n_subjects, n_samples_per_subject, shape=shape + ) + sample_transformer = sample_transformer.fit(training_samples) - # fit - n_components = 2 - estimator = SamplePCA(n_components=n_components) - estimator = estimator.fit(samples) + oracle = np.zeros(shape) + n_subjects + test_sample = generate_samples(1, 1) + transformed_sample = sample_transformer.transform(test_sample) + assert assert_sample(transformed_sample, [oracle]) + assert os.path.exists(projector_file) - # https://scikit-learn.org/stable/modules/generated/sklearn.utils.validation.check_is_fitted.html - assert check_is_fitted(estimator, "n_components_") is None + # Testing checkpoint + checkpointing_transformer = mario.CheckpointWrapper( + sample_transformer, + features_dir=dir_name, + load_func=algorithm.read_feature, + save_func=algorithm.write_feature, + model_path=projector_pkl, + ) + # Fitting again to assert if it loads again + checkpointing_transformer = checkpointing_transformer.fit(training_samples) + transformed_sample = checkpointing_transformer.transform(test_sample) - # transform - samples_tr = estimator.transform(samples) - assert samples_tr[0].data.shape == (n_components,) + # Fitting again + assert assert_sample(transformed_sample, oracle) + transformed_sample = checkpointing_transformer.transform(test_sample) + assert assert_checkpoints(transformed_sample, dir_name) + assert os.path.exists(projector_pkl) - ## Test Checkpoining - with tempfile.TemporaryDirectory() as d: - model_path = os.path.join(d, "model.pkl") - estimator = CheckpointSamplePCA(n_components=n_components, features_dir=d, model_path=model_path) +def test_wrap_bob_pipeline(): + def run_pipeline(with_dask, with_checkpoint): + with tempfile.TemporaryDirectory() as dir_name: + if with_checkpoint: + pipeline = make_pipeline( + wrap_checkpoint_preprocessor(FakePreprocesor(), dir_name,), + wrap_checkpoint_extractor(FakeExtractor(), dir_name,), + wrap_checkpoint_algorithm(FakeAlgorithm(), dir_name), + ) + else: + pipeline = make_pipeline( + wrap_sample_preprocessor(FakePreprocesor()), + wrap_sample_extractor(FakeExtractor(), dir_name,), + wrap_sample_algorithm(FakeAlgorithm(), dir_name), + ) - # fit - estimator = estimator.fit(samples) - assert check_is_fitted(estimator, "n_components_") is None - assert os.path.exists(model_path) + oracle = [7.0, 7.0, 7.0, 7.0] + training_samples = generate_samples(n_subjects=2, n_samples_per_subject=2) + test_samples = generate_samples(n_subjects=1, n_samples_per_subject=1) + if with_dask: + pipeline = mario.wrap(["dask"], pipeline) + transformed_samples = ( + pipeline.fit(training_samples) + .transform(test_samples) + .compute(scheduler="single-threaded") + ) + else: + transformed_samples = pipeline.fit(training_samples).transform( + test_samples + ) + assert assert_sample(transformed_samples, oracle) - # transform - samples_tr = estimator.transform(samples) - assert samples_tr[0].data.shape == (n_components,) - assert os.path.exists(os.path.join(d, samples_tr[0].key+".h5")) + run_pipeline(False, False) + run_pipeline(False, True) + run_pipeline(True, False) + run_pipeline(True, True) diff --git a/bob/bio/base/test/test_vanilla_biometrics.py b/bob/bio/base/test/test_vanilla_biometrics.py index 75d9354860926ed739c382eb590252dc87ecf0ed..ff525ab9e06c428852da76ab889af0b3a3cbff14 100644 --- a/bob/bio/base/test/test_vanilla_biometrics.py +++ b/bob/bio/base/test/test_vanilla_biometrics.py @@ -2,33 +2,79 @@ # vim: set fileencoding=utf-8 : # @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> -from bob.pipelines.sample import Sample, SampleSet, DelayedSample +from bob.pipelines import Sample, SampleSet, DelayedSample import os -import numpy +import numpy as np import tempfile -from sklearn.utils.validation import check_is_fitted +from sklearn.pipeline import make_pipeline +from bob.bio.base.wrappers import wrap_bob_legacy +from bob.bio.base.test.test_transformers import ( + FakePreprocesor, + FakeExtractor, + FakeAlgorithm, +) +from bob.bio.base.pipelines.vanilla_biometrics import ( + Distance, + VanillaBiometricsPipeline, + BioAlgorithmCheckpointWrapper, + dask_vanilla_biometrics, + FourColumnsScoreWriter, + CSVScoreWriter, + BioAlgorithmLegacy, +) +import bob.pipelines as mario +import uuid +import shutil +import itertools -class DummyDatabase: - def __init__(self, delayed=False, n_references=10, n_probes=10, dim=10, one_d = True): +class DummyDatabase: + def __init__(self, delayed=False, n_references=10, n_probes=10, dim=10, one_d=True): self.delayed = delayed self.dim = dim self.n_references = n_references self.n_probes = n_probes self.one_d = one_d - + self.gender_choices = ["M", "F"] + self.metadata_1_choices = ["A", "B", "C"] def _create_random_1dsamples(self, n_samples, offset, dim): - return [ Sample(numpy.random.rand(dim), key=i) for i in range(offset,offset+n_samples) ] + return [ + Sample( + np.random.rand(dim), + key=str(uuid.uuid4()), + annotations=1, + subject=str(i), + ) + for i in range(offset, offset + n_samples) + ] def _create_random_2dsamples(self, n_samples, offset, dim): - return [ Sample(numpy.random.rand(dim, dim), key=i) for i in range(offset,offset+n_samples) ] + return [ + Sample( + np.random.rand(dim, dim), + key=str(uuid.uuid4()), + annotations=1, + subject=str(i), + ) + for i in range(offset, offset + n_samples) + ] def _create_random_sample_set(self, n_sample_set=10, n_samples=2): # Just generate random samples - sample_set = [SampleSet(samples=[], key=i) for i in range(n_sample_set)] + np.random.seed(10) + sample_set = [ + SampleSet( + samples=[], + key=str(i), + subject=str(i), + gender=np.random.choice(self.gender_choices), + metadata_1=np.random.choice(self.metadata_1_choices), + ) + for i in range(n_sample_set) + ] offset = 0 for s in sample_set: @@ -42,41 +88,209 @@ class DummyDatabase: return sample_set - def background_model_samples(self): - return self._create_random_sample_set() - + samples = [sset.samples for sset in self._create_random_sample_set()] + return list(itertools.chain(*samples)) def references(self): return self._create_random_sample_set(self.n_references, self.dim) - def probes(self): - probes = self._create_random_sample_set(self.n_probes, self.dim) + probes = [] + + probes = self._create_random_sample_set(n_sample_set=10, n_samples=1) for p in probes: p.references = list(range(self.n_references)) + return probes + @property + def allow_scoring_with_all_biometric_references(self): + return True -from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import Distance -import itertools -def test_distance_comparator(): - - n_references = 10 - dim = 10 - n_probes = 10 - database = DummyDatabase(delayed=False, n_references=n_references, n_probes=n_probes, dim=10, one_d = True) - references = database.references() - probes = database.probes() - - comparator = Distance() - references = comparator.enroll_samples(references) - assert len(references)== n_references - assert references[0].data.shape == (dim,) - - probes = database.probes() - scores = comparator.score_samples(probes, references) - scores = list(itertools.chain(*scores)) - - assert len(scores) == n_probes*n_references - assert len(scores[0].samples)==n_references + +def _make_transformer(dir_name): + pipeline = make_pipeline( + wrap_bob_legacy( + FakePreprocesor(), + dir_name, + transform_extra_arguments=(("annotations", "annotations"),), + ), + wrap_bob_legacy(FakeExtractor(), dir_name,), + ) + + return pipeline + + +def _make_transformer_with_algorithm(dir_name): + pipeline = make_pipeline( + wrap_bob_legacy( + FakePreprocesor(), + dir_name, + transform_extra_arguments=(("annotations", "annotations"),), + ), + wrap_bob_legacy(FakeExtractor(), dir_name), + wrap_bob_legacy(FakeAlgorithm(), dir_name), + ) + + return pipeline + + +def test_on_memory(): + + with tempfile.TemporaryDirectory() as dir_name: + + def run_pipeline(with_dask): + database = DummyDatabase() + + transformer = _make_transformer(dir_name) + + biometric_algorithm = Distance() + + vanilla_biometrics_pipeline = VanillaBiometricsPipeline( + transformer, biometric_algorithm + ) + + if with_dask: + vanilla_biometrics_pipeline = dask_vanilla_biometrics( + vanilla_biometrics_pipeline, npartitions=2 + ) + + scores = vanilla_biometrics_pipeline( + database.background_model_samples(), + database.references(), + database.probes(), + allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references, + ) + + if with_dask: + scores = scores.compute(scheduler="single-threaded") + + assert len(scores) == 10 + for probe_ssets in scores: + for probe in probe_ssets: + assert len(probe) == 10 + + run_pipeline(False) + run_pipeline(False) # Testing checkpoint + shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch + os.makedirs(dir_name, exist_ok=True) + run_pipeline(True) + run_pipeline(True) # Testing checkpoint + + +def test_checkpoint_bioalg_as_transformer(): + + with tempfile.TemporaryDirectory() as dir_name: + + def run_pipeline(with_dask, score_writer=FourColumnsScoreWriter()): + database = DummyDatabase() + + transformer = _make_transformer(dir_name) + + biometric_algorithm = BioAlgorithmCheckpointWrapper( + Distance(), base_dir=dir_name, score_writer=score_writer + ) + + vanilla_biometrics_pipeline = VanillaBiometricsPipeline( + transformer, biometric_algorithm + ) + + if with_dask: + vanilla_biometrics_pipeline = dask_vanilla_biometrics( + vanilla_biometrics_pipeline, npartitions=2 + ) + + scores = vanilla_biometrics_pipeline( + database.background_model_samples(), + database.references(), + database.probes(), + allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references, + ) + if with_dask: + scores = scores.compute(scheduler="single-threaded") + + if isinstance(score_writer, CSVScoreWriter): + base_path = os.path.join(dir_name, "concatenated_scores") + score_writer.concatenate_write_scores(scores, base_path) + assert ( + len(open(os.path.join(base_path, "chunk_0.csv")).readlines()) == 101 + ) + else: + filename = os.path.join(dir_name, "concatenated_scores.txt") + score_writer.concatenate_write_scores(scores, filename) + assert len(open(filename).readlines()) == 100 + + run_pipeline(False) + run_pipeline(False) # Checking if the checkpointng works + shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch + os.makedirs(dir_name, exist_ok=True) + + # Dask + run_pipeline(True) + run_pipeline(True) # Checking if the checkpointng works + shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch + os.makedirs(dir_name, exist_ok=True) + + # CSVWriter + run_pipeline(False, CSVScoreWriter()) + run_pipeline(False, CSVScoreWriter()) # Checking if the checkpointng works + shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch + os.makedirs(dir_name, exist_ok=True) + + # CSVWriter + Dask + run_pipeline(True, CSVScoreWriter()) + run_pipeline(True, CSVScoreWriter()) # Checking if the checkpointng works + + +def test_checkpoint_bioalg_as_bioalg(): + + with tempfile.TemporaryDirectory() as dir_name: + + def run_pipeline(with_dask, score_writer=FourColumnsScoreWriter()): + database = DummyDatabase() + + transformer = _make_transformer_with_algorithm(dir_name) + projector_file = transformer[2].estimator.estimator.projector_file + + biometric_algorithm = BioAlgorithmLegacy( + FakeAlgorithm(), + base_dir=dir_name, + score_writer=score_writer, + projector_file=projector_file, + ) + + vanilla_biometrics_pipeline = VanillaBiometricsPipeline( + transformer, biometric_algorithm + ) + + if with_dask: + vanilla_biometrics_pipeline = dask_vanilla_biometrics( + vanilla_biometrics_pipeline, npartitions=2 + ) + + scores = vanilla_biometrics_pipeline( + database.background_model_samples(), + database.references(), + database.probes(), + allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references, + ) + + filename = os.path.join(dir_name, "concatenated_scores.txt") + score_writer.concatenate_write_scores(scores, filename) + + if isinstance(score_writer, CSVScoreWriter): + assert len(open(filename).readlines()) == 101 + else: + assert len(open(filename).readlines()) == 100 + + run_pipeline(False) + run_pipeline(False) # Checking if the checkpointng works + shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch + os.makedirs(dir_name, exist_ok=True) + + # Dask + run_pipeline(True) + run_pipeline(True) # Checking if the checkpointng works + shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch + os.makedirs(dir_name, exist_ok=True) diff --git a/bob/bio/base/transformers/__init__.py b/bob/bio/base/transformers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4893224a141f179011ceea7b35e2df51af3d94c4 --- /dev/null +++ b/bob/bio/base/transformers/__init__.py @@ -0,0 +1,18 @@ +# see https://docs.python.org/3/library/pkgutil.html +from pkgutil import extend_path + +__path__ = extend_path(__path__, __name__) + +from collections import defaultdict +def split_X_by_y(X, y): + training_data = defaultdict(list) + for x1, y1 in zip(X, y): + training_data[y1].append(x1) + training_data = list(training_data.values()) + return training_data + + + +from .preprocessor import PreprocessorTransformer +from .extractor import ExtractorTransformer +from .algorithm import AlgorithmTransformer diff --git a/bob/bio/base/transformers/algorithm.py b/bob/bio/base/transformers/algorithm.py new file mode 100644 index 0000000000000000000000000000000000000000..68e55d3846597e97f2ac47e4f91f539e2d1b6b07 --- /dev/null +++ b/bob/bio/base/transformers/algorithm.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +from sklearn.base import TransformerMixin, BaseEstimator +from bob.bio.base.algorithm import Algorithm +from bob.pipelines.utils import is_picklable +from . import split_X_by_y +import os + + +class AlgorithmTransformer(TransformerMixin, BaseEstimator): + """Class that wraps :any:`bob.bio.base.algorithm.Algoritm` + + :any:`LegacyAlgorithmrMixin.fit` maps to :any:`bob.bio.base.algorithm.Algoritm.train_projector` + + :any:`LegacyAlgorithmrMixin.transform` maps :any:`bob.bio.base.algorithm.Algoritm.project` + + Example + ------- + + Wrapping LDA algorithm with functtools + >>> from bob.bio.base.pipelines.vanilla_biometrics import AlgorithmTransformer + >>> from bob.bio.base.algorithm import LDA + >>> transformer = AlgorithmTransformer(LDA(use_pinv=True, pca_subspace_dimension=0.90) + + + Parameters + ---------- + instance: object + An instance of bob.bio.base.algorithm.Algorithm + + """ + + def __init__( + self, instance, projector_file=None, **kwargs, + ): + + if not isinstance(instance, Algorithm): + raise ValueError( + "`instance` should be an instance of `bob.bio.base.extractor.Algorithm`" + ) + + if instance.requires_projector_training and ( + projector_file is None or projector_file == "" + ): + raise ValueError( + f"`projector_file` needs to be set if extractor {instance} requires training" + ) + + if not is_picklable(instance): + raise ValueError(f"{instance} needs to be picklable") + + self.instance = instance + self.projector_file = projector_file + super().__init__(**kwargs) + + def fit(self, X, y=None): + if not self.instance.requires_projector_training: + return self + training_data = X + if self.instance.split_training_features_by_client: + training_data = split_X_by_y(X, y) + + os.makedirs(os.path.dirname(self.projector_file), exist_ok=True) + self.instance.train_projector(training_data, self.projector_file) + return self + + def transform(self, X, metadata=None): + if metadata is None: + return [self.instance.project(data) for data in X] + else: + return [ + self.instance.project(data, metadata) + for data, metadata in zip(X, metadata) + ] + + def _more_tags(self): + return { + "stateless": not self.instance.requires_projector_training, + "requires_fit": self.instance.requires_projector_training, + } diff --git a/bob/bio/base/transformers/extractor.py b/bob/bio/base/transformers/extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..c25bcfd942f4a2c43a68469dbc82eededaa8e817 --- /dev/null +++ b/bob/bio/base/transformers/extractor.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +from sklearn.base import TransformerMixin, BaseEstimator +from bob.bio.base.extractor import Extractor +from . import split_X_by_y + + +class ExtractorTransformer(TransformerMixin, BaseEstimator): + """ + Scikit learn transformer for :any:`bob.bio.base.extractor.Extractor`. + + Parameters + ---------- + + instance: object + An instance of `bob.bio.base.extractor.Extractor` + + model_path: ``str`` + Model path in case :any:`bob.bio.base.extractor.Extractor.requires_training` is equals to true + + """ + + def __init__( + self, instance, model_path=None, **kwargs, + ): + + if not isinstance(instance, Extractor): + raise ValueError( + "`instance` should be an instance of `bob.bio.base.extractor.Extractor`" + ) + + if instance.requires_training and (model_path is None or model_path == ""): + raise ValueError( + f"`model_path` needs to be set if extractor {instance} requires training" + ) + + self.instance = instance + self.model_path = model_path + super().__init__(**kwargs) + + def fit(self, X, y=None): + if not self.instance.requires_training: + return self + + training_data = X + if self.instance.split_training_data_by_client: + training_data = split_X_by_y(X, y) + + self.instance.train(training_data, self.model_path) + return self + + def transform(self, X, metadata=None): + if metadata is None: + return [self.instance(data) for data in X] + else: + return [ + self.instance(data, metadata) for data, metadata in zip(X, metadata) + ] + + def _more_tags(self): + return { + "stateless": not self.instance.requires_training, + "requires_fit": self.instance.requires_training, + } diff --git a/bob/bio/base/transformers/preprocessor.py b/bob/bio/base/transformers/preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..1b1e15c2a2c6d4552e791a70b642db6d0653faeb --- /dev/null +++ b/bob/bio/base/transformers/preprocessor.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +from sklearn.base import TransformerMixin, BaseEstimator +from bob.bio.base.preprocessor import Preprocessor + +class PreprocessorTransformer(TransformerMixin, BaseEstimator): + """ + Scikit learn transformer for :any:`bob.bio.base.preprocessor.Preprocessor`. + + Parameters + ---------- + + instance: object + An instance of `bob.bio.base.preprocessor.Preprocessor` + + + """ + + def __init__( + self, + instance, + **kwargs, + ): + + if not isinstance(instance, Preprocessor): + raise ValueError("`instance` should be an instance of `bob.bio.base.preprocessor.Preprocessor`") + + self.instance = instance + super().__init__(**kwargs) + + def transform(self, X, annotations=None): + if annotations is None: + return [self.instance(data) for data in X] + else: + return [self.instance(data, annot) for data, annot in zip(X, annotations)] + + def _more_tags(self): + return {"stateless": True, "requires_fit": False} diff --git a/bob/bio/base/wrappers.py b/bob/bio/base/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..2123ddde73b31d978992b28713841776447c13e8 --- /dev/null +++ b/bob/bio/base/wrappers.py @@ -0,0 +1,433 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +from bob.bio.base.transformers import ( + PreprocessorTransformer, + ExtractorTransformer, + AlgorithmTransformer, +) +from bob.bio.base.preprocessor import Preprocessor +from bob.bio.base.extractor import Extractor +from bob.bio.base.algorithm import Algorithm +import bob.pipelines as mario +import os +from bob.bio.base.utils import is_argument_available + + +def wrap_bob_legacy( + bob_object, + dir_name, + fit_extra_arguments=None, + transform_extra_arguments=None, + dask_it=False, + **kwargs +): + """ + Wraps either :any:`bob.bio.base.preprocessor.Preprocessor`, :any:`bob.bio.base.extractor.Extractor` + or :any:`bob.bio.base.algorithm.Algorithm` with :any:`sklearn.base.TransformerMixin` + and :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper` + + + Parameters + ---------- + + bob_object: object + Instance of :any:`bob.bio.base.preprocessor.Preprocessor`, :any:`bob.bio.base.extractor.Extractor` and :any:`bob.bio.base.algorithm.Algorithm` + + dir_name: str + Directory name for the checkpoints + + fit_extra_arguments: [tuple] + Same behavior as in Check :any:`bob.pipelines.wrappers.fit_extra_arguments` + + transform_extra_arguments: [tuple] + Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments` + + dask_it: bool + If True, the transformer will be a dask graph + + """ + + if isinstance(bob_object, Preprocessor): + transformer = wrap_checkpoint_preprocessor( + bob_object, features_dir=os.path.join(dir_name, "preprocessor"), + **kwargs + ) + elif isinstance(bob_object, Extractor): + transformer = wrap_checkpoint_extractor( + bob_object, + features_dir=os.path.join(dir_name, "extractor"), + model_path=dir_name, + fit_extra_arguments=fit_extra_arguments, + transform_extra_arguments=transform_extra_arguments, + **kwargs + ) + elif isinstance(bob_object, Algorithm): + transformer = wrap_checkpoint_algorithm( + bob_object, + features_dir=os.path.join(dir_name, "algorithm"), + model_path=dir_name, + fit_extra_arguments=fit_extra_arguments, + transform_extra_arguments=transform_extra_arguments, + **kwargs + ) + else: + raise ValueError( + "`bob_object` should be an instance of `Preprocessor`, `Extractor` and `Algorithm`" + ) + + if dask_it: + transformer = mario.wrap(["dask"], transformer) + + return transformer + + +def wrap_sample_preprocessor( + preprocessor, + transform_extra_arguments=(("annotations", "annotations"),), + **kwargs +): + """ + Wraps :any:`bob.bio.base.preprocessor.Preprocessor` with + :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper` + + .. warning:: + This wrapper doesn't checkpoint data + + Parameters + ---------- + + preprocessor: :any:`bob.bio.base.preprocessor.Preprocessor` + Instance of :any:`bob.bio.base.transformers.PreprocessorTransformer` to be wrapped + + transform_extra_arguments: [tuple] + Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments` + + """ + + transformer = PreprocessorTransformer(preprocessor) + return mario.wrap( + ["sample"], + transformer, + transform_extra_arguments=transform_extra_arguments, + ) + + +def wrap_checkpoint_preprocessor( + preprocessor, + features_dir=None, + transform_extra_arguments=(("annotations", "annotations"),), + load_func=None, + save_func=None, + extension=".hdf5", +): + """ + Wraps :any:`bob.bio.base.preprocessor.Preprocessor` with + :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper` + + Parameters + ---------- + + preprocessor: :any:`bob.bio.base.preprocessor.Preprocessor` + Instance of :any:`bob.bio.base.transformers.PreprocessorTransformer` to be wrapped + + features_dir: str + Features directory to be checkpointed (see :any:bob.pipelines.CheckpointWrapper`). + + extension : str, optional + Extension o preprocessed files (see :any:bob.pipelines.CheckpointWrapper`). + + load_func : None, optional + Function that loads data to be preprocessed. + The default is :any:`bob.bio.base.preprocessor.Preprocessor.read_data` + + save_func : None, optional + Function that saves preprocessed data. + The default is :any:`bob.bio.base.preprocessor.Preprocessor.write_data` + + transform_extra_arguments: [tuple] + Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments` + + """ + + transformer = PreprocessorTransformer(preprocessor) + return mario.wrap( + ["sample", "checkpoint"], + transformer, + load_func=load_func or preprocessor.read_data, + save_func=save_func or preprocessor.write_data, + features_dir=features_dir, + transform_extra_arguments=transform_extra_arguments, + extension=extension, + ) + + +def _prepare_extractor_sample_args( + extractor, transform_extra_arguments, fit_extra_arguments +): + if transform_extra_arguments is None and is_argument_available( + "metadata", extractor.__call__ + ): + transform_extra_arguments = (("metadata", "metadata"),) + + if ( + fit_extra_arguments is None + and extractor.requires_training + and extractor.split_training_data_by_client + ): + fit_extra_arguments = (("y", "subject"),) + + return transform_extra_arguments, fit_extra_arguments + + +def wrap_sample_extractor( + extractor, + fit_extra_arguments=None, + transform_extra_arguments=None, + model_path=None, + **kwargs, +): + """ + Wraps :any:`bob.bio.base.extractor.Extractor` with + :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper` + + Parameters + ---------- + + extractor: :any:`bob.bio.base.extractor.Preprocessor` + Instance of :any:`bob.bio.base.transformers.ExtractorTransformer` to be wrapped + + transform_extra_arguments: [tuple], optional + Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments` + + model_path: str + Path to `extractor_file` in :any:`bob.bio.base.extractor.Extractor` + + """ + + extractor_file = ( + os.path.join(model_path, "Extractor.hdf5") if model_path is not None else None + ) + + transformer = ExtractorTransformer(extractor, model_path=extractor_file) + + transform_extra_arguments, fit_extra_arguments = _prepare_extractor_sample_args( + extractor, transform_extra_arguments, fit_extra_arguments + ) + + return mario.wrap( + ["sample"], + transformer, + transform_extra_arguments=transform_extra_arguments, + fit_extra_arguments=fit_extra_arguments, + **kwargs, + ) + + +def wrap_checkpoint_extractor( + extractor, + features_dir=None, + fit_extra_arguments=None, + transform_extra_arguments=None, + load_func=None, + save_func=None, + extension=".hdf5", + model_path=None, + **kwargs, +): + """ + Wraps :any:`bob.bio.base.extractor.Extractor` with + :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper` + + Parameters + ---------- + + extractor: :any:`bob.bio.base.extractor.Preprocessor` + Instance of :any:`bob.bio.base.transformers.ExtractorTransformer` to be wrapped + + features_dir: str + Features directory to be checkpointed (see :any:bob.pipelines.CheckpointWrapper`). + + extension : str, optional + Extension o preprocessed files (see :any:bob.pipelines.CheckpointWrapper`). + + load_func : None, optional + Function that loads data to be preprocessed. + The default is :any:`bob.bio.base.extractor.Extractor.read_feature` + + save_func : None, optional + Function that saves preprocessed data. + The default is :any:`bob.bio.base.extractor.Extractor.write_feature` + + fit_extra_arguments: [tuple] + Same behavior as in Check :any:`bob.pipelines.wrappers.fit_extra_arguments` + + transform_extra_arguments: [tuple], optional + Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments` + + model_path: str + See :any:`TransformerExtractor`. + + """ + + extractor_file = ( + os.path.join(model_path, "Extractor.hdf5") if model_path is not None else None + ) + + model_file = ( + os.path.join(model_path, "Extractor.pkl") if model_path is not None else None + ) + transformer = ExtractorTransformer(extractor, model_path=extractor_file) + + transform_extra_arguments, fit_extra_arguments = _prepare_extractor_sample_args( + extractor, transform_extra_arguments, fit_extra_arguments + ) + + return mario.wrap( + ["sample", "checkpoint"], + transformer, + load_func=load_func or extractor.read_feature, + save_func=save_func or extractor.write_feature, + model_path=model_file, + features_dir=features_dir, + transform_extra_arguments=transform_extra_arguments, + fit_extra_arguments=fit_extra_arguments, + **kwargs, + ) + + +def _prepare_algorithm_sample_args( + algorithm, transform_extra_arguments, fit_extra_arguments +): + + if transform_extra_arguments is None and is_argument_available( + "metadata", algorithm.project + ): + transform_extra_arguments = (("metadata", "metadata"),) + + if ( + fit_extra_arguments is None + and algorithm.requires_projector_training + and algorithm.split_training_features_by_client + ): + fit_extra_arguments = (("y", "subject"),) + + return transform_extra_arguments, fit_extra_arguments + + +def wrap_sample_algorithm( + algorithm, + model_path=None, + fit_extra_arguments=None, + transform_extra_arguments=None, + **kwargs, +): + """ + Wraps :any:`bob.bio.base.algorithm.Algorithm` with + :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper` + + Parameters + ---------- + + algorithm_transformer: :any:`bob.bio.base.algorithm.Algorithm` + Instance of :any:`bob.bio.base.transformers.AlgorithmTransformer` to be wrapped + + model_path: str + Path to `projector_file` in :any:`bob.bio.base.algorithm.Algorithm` + + fit_extra_arguments: [tuple] + Same behavior as in Check :any:`bob.pipelines.wrappers.fit_extra_arguments` + + transform_extra_arguments: [tuple] + Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments` + + """ + + projector_file = ( + os.path.join(model_path, "Projector.hdf5") if model_path is not None else None + ) + + transformer = AlgorithmTransformer(algorithm, projector_file=projector_file) + + transform_extra_arguments, fit_extra_arguments = _prepare_algorithm_sample_args( + algorithm, transform_extra_arguments, fit_extra_arguments + ) + + return mario.wrap( + ["sample"], + transformer, + transform_extra_arguments=transform_extra_arguments, + fit_extra_arguments=fit_extra_arguments, + ) + + +def wrap_checkpoint_algorithm( + algorithm, + model_path=None, + features_dir=None, + extension=".hdf5", + fit_extra_arguments=None, + transform_extra_arguments=None, + load_func=None, + save_func=None, + **kwargs, +): + """ + Wraps :any:`bob.bio.base.algorithm.Algorithm` with + :any:`bob.pipelines.wrappers.CheckpointWrapper` and :any:`bob.pipelines.wrappers.SampleWrapper` + + Parameters + ---------- + + algorithm_transformer: :any:`bob.bio.base.algorithm.Algorithm` + Instance of :any:`bob.bio.base.transformers.AlgorithmTransformer` to be wrapped + + features_dir: str + Features directory to be checkpointed (see :any:bob.pipelines.CheckpointWrapper`). + + model_path: str + Path to checkpoint the model + + extension : str, optional + Extension o preprocessed files (see :any:bob.pipelines.CheckpointWrapper`). + + fit_extra_arguments: [tuple] + Same behavior as in Check :any:`bob.pipelines.wrappers.fit_extra_arguments` + + transform_extra_arguments: [tuple] + Same behavior as in Check :any:`bob.pipelines.wrappers.transform_extra_arguments` + + load_func : None, optional + Function that loads data to be preprocessed. + The default is :any:`bob.bio.base.extractor.Extractor.read_feature` + + save_func : None, optional + Function that saves preprocessed data. + The default is :any:`bob.bio.base.extractor.Extractor.write_feature` + + + """ + + projector_file = ( + os.path.join(model_path, "Projector.hdf5") if model_path is not None else None + ) + + model_file = ( + os.path.join(model_path, "Projector.pkl") if model_path is not None else None + ) + transformer = AlgorithmTransformer(algorithm, projector_file=projector_file) + + transform_extra_arguments, fit_extra_arguments = _prepare_algorithm_sample_args( + algorithm, transform_extra_arguments, fit_extra_arguments + ) + + return mario.wrap( + ["sample", "checkpoint"], + transformer, + load_func=load_func or algorithm.read_feature, + save_func=save_func or algorithm.write_feature, + model_path=model_file, + features_dir=features_dir, + transform_extra_arguments=transform_extra_arguments, + fit_extra_arguments=fit_extra_arguments, + ) diff --git a/doc/py_api.rst b/doc/py_api.rst index 5ab5c938b54ec087a9d5a895279c41180518f492..d127ce4ddd0973075cb6ef34ebaf40e74559d83d 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -23,8 +23,6 @@ IO-related functions Pipelines ~~~~~~~~~ -.. autosummary:: - bob.bio.base.pipelines.vanilla_biometrics.biometric_pipeline Functions dealing with resources @@ -71,18 +69,13 @@ Plotting bob.bio.base.script.figure.Dir bob.bio.base.script.figure.Hist -OpenBR conversions ------------------- -.. autosummary:: - bob.bio.base.score.openbr.write_matrix - bob.bio.base.score.openbr.write_score_file + Details ------- .. automodule:: bob.bio.base.score.load -.. automodule:: bob.bio.base.score.openbr .. automodule:: bob.bio.base.script.figure .. automodule:: bob.bio.base.script.commands .. automodule:: bob.bio.base.script.gen