Skip to content
Snippets Groups Projects
Commit cd0700ae authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira Committed by Amir MOHAMMADI
Browse files

Cleaning up

parent 4df4e7e5
Branches
Tags
1 merge request!180[dask] Preparing bob.bio.base for dask pipelines
...@@ -3,4 +3,4 @@ ...@@ -3,4 +3,4 @@
import bob.bio.base import bob.bio.base
# Linearization of the data to a vector, no data type specified # Linearization of the data to a vector, no data type specified
extractor = bob.bio.base.extractor.Linearize extractor = bob.bio.base.extractor.Linearize()
from .linearize import Linearize, SampleLinearize, CheckpointSampleLinearize from .linearize import Linearize, SampleLinearize, CheckpointSampleLinearize
#from .pca import CheckpointSamplePCA, SamplePCA from .pca import CheckpointSamplePCA, SamplePCA
...@@ -83,12 +83,23 @@ class LegacyProcessorMixin(TransformerMixin): ...@@ -83,12 +83,23 @@ class LegacyProcessorMixin(TransformerMixin):
def transform(self, X): def transform(self, X):
X = check_array(X, allow_nd=True)
# Instantiates and do the "real" transform # Instantiates and do the "real" transform
if self.instance is None: if self.instance is None:
self.instance = self.callable() self.instance = self.callable()
return [self.instance(x) for x in X]
if isinstance(X[0], dict):
# Handling annotations if it's the case
retval = []
for x in X:
data = x["data"]
annotations = x["annotations"]
retval.append(self.instance(data, annotations=annotations))
return retval
else:
X = check_array(X, allow_nd=True)
return [self.instance(x) for x in X]
from bob.pipelines.mixins import CheckpointMixin, SampleMixin from bob.pipelines.mixins import CheckpointMixin, SampleMixin
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
TODO: This should be deployed in bob.pipelines TODO: This should be deployed in bob.pipelines
""" """
from bob.pipelines.processor import CheckpointMixin, SampleMixin from bob.pipelines.mixins import CheckpointMixin, SampleMixin
from sklearn.base import TransformerMixin from sklearn.base import TransformerMixin
from sklearn.decomposition import PCA from sklearn.decomposition import PCA
import numpy import numpy
......
...@@ -12,6 +12,9 @@ import dask.bag ...@@ -12,6 +12,9 @@ import dask.bag
import dask.delayed import dask.delayed
from bob.pipelines.sample import samplesets_to_samples from bob.pipelines.sample import samplesets_to_samples
import logging
logger = logging.getLogger(__name__)
def biometric_pipeline( def biometric_pipeline(
background_model_samples, background_model_samples,
...@@ -20,16 +23,21 @@ def biometric_pipeline( ...@@ -20,16 +23,21 @@ def biometric_pipeline(
extractor, extractor,
biometric_algorithm, biometric_algorithm,
): ):
logger.info(f" >> Vanilla Biometrics: Training background model with pipeline {extractor}")
## Training background model (fit will return even if samples is ``None``, ## Training background model (fit will return even if samples is ``None``,
## in which case we suppose the algorithm is not trainable in any way) ## in which case we suppose the algorithm is not trainable in any way)
extractor = train_background_model(background_model_samples, extractor) extractor = train_background_model(background_model_samples, extractor)
logger.info(f" >> Creating biometric references with the biometric algorithm {biometric_algorithm}")
## Create biometric samples ## Create biometric samples
biometric_references = create_biometric_reference( biometric_references = create_biometric_reference(
biometric_reference_samples, extractor, biometric_algorithm biometric_reference_samples, extractor, biometric_algorithm
) )
logger.info(f" >> Computing scores with the biometric algorithm {biometric_algorithm}")
## Scores all probes ## Scores all probes
return compute_scores( return compute_scores(
probe_samples, biometric_references, extractor, biometric_algorithm probe_samples, biometric_references, extractor, biometric_algorithm
...@@ -38,7 +46,6 @@ def biometric_pipeline( ...@@ -38,7 +46,6 @@ def biometric_pipeline(
def train_background_model(background_model_samples, extractor): def train_background_model(background_model_samples, extractor):
# TODO: Maybe here is supervised
X, y = samplesets_to_samples(background_model_samples) X, y = samplesets_to_samples(background_model_samples)
extractor = extractor.fit(X, y=y) extractor = extractor.fit(X, y=y)
......
...@@ -171,6 +171,8 @@ def vanilla_biometrics( ...@@ -171,6 +171,8 @@ def vanilla_biometrics(
with open(os.path.join(output,f"scores-{g}"), "w") as f: with open(os.path.join(output,f"scores-{g}"), "w") as f:
biometric_references = database.references(group=g) biometric_references = database.references(group=g)
logger.info(f"Running vanilla biometrics for group {g}")
result = biometric_pipeline( result = biometric_pipeline(
database.background_model_samples(), database.background_model_samples(),
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
from bob.pipelines.sample import Sample, SampleSet, DelayedSample
import os
import numpy
import tempfile
from sklearn.utils.validation import check_is_fitted
from bob.bio.base.processor import Linearize, SampleLinearize, CheckpointSampleLinearize
def test_linearize_processor():
## Test the transformer only
transformer = Linearize()
X = numpy.zeros(shape=(10,10))
X_tr = transformer.transform(X)
assert X_tr.shape == (100,)
## Test wrapped in to a Sample
sample = Sample(X, key="1")
transformer = SampleLinearize()
X_tr = transformer.transform([sample])
assert X_tr[0].data.shape == (100,)
## Test checkpoint
with tempfile.TemporaryDirectory() as d:
transformer = CheckpointSampleLinearize(features_dir=d)
X_tr = transformer.transform([sample])
assert X_tr[0].data.shape == (100,)
assert os.path.exists(os.path.join(d, "1.h5"))
from bob.bio.base.processor import SamplePCA, CheckpointSamplePCA
def test_pca_processor():
## Test wrapped in to a Sample
X = numpy.random.rand(100,10)
samples = [Sample(data, key=str(i)) for i, data in enumerate(X)]
# fit
n_components = 2
estimator = SamplePCA(n_components=n_components)
estimator = estimator.fit(samples)
# https://scikit-learn.org/stable/modules/generated/sklearn.utils.validation.check_is_fitted.html
assert check_is_fitted(estimator, "n_components_") is None
# transform
samples_tr = estimator.transform(samples)
assert samples_tr[0].data.shape == (n_components,)
## Test Checkpoining
with tempfile.TemporaryDirectory() as d:
model_path = os.path.join(d, "model.pkl")
estimator = CheckpointSamplePCA(n_components=n_components, features_dir=d, model_path=model_path)
# fit
estimator = estimator.fit(samples)
assert check_is_fitted(estimator, "n_components_") is None
assert os.path.exists(model_path)
# transform
samples_tr = estimator.transform(samples)
assert samples_tr[0].data.shape == (n_components,)
assert os.path.exists(os.path.join(d, samples_tr[0].key+".h5"))
...@@ -9,9 +9,6 @@ import tempfile ...@@ -9,9 +9,6 @@ import tempfile
from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_is_fitted
#from bob.bio.base.processor import Linearize, SampleLinearize, CheckpointSampleLinearize
class DummyDatabase: class DummyDatabase:
def __init__(self, delayed=False, n_references=10, n_probes=10, dim=10, one_d = True): def __init__(self, delayed=False, n_references=10, n_probes=10, dim=10, one_d = True):
...@@ -61,7 +58,8 @@ class DummyDatabase: ...@@ -61,7 +58,8 @@ class DummyDatabase:
return probes return probes
from bob.bio.base.pipelines.vanilla_biometrics.comparator import DistanceComparator from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import Distance
import itertools
def test_distance_comparator(): def test_distance_comparator():
n_references = 10 n_references = 10
...@@ -70,40 +68,15 @@ def test_distance_comparator(): ...@@ -70,40 +68,15 @@ def test_distance_comparator():
database = DummyDatabase(delayed=False, n_references=n_references, n_probes=n_probes, dim=10, one_d = True) database = DummyDatabase(delayed=False, n_references=n_references, n_probes=n_probes, dim=10, one_d = True)
references = database.references() references = database.references()
probes = database.probes() probes = database.probes()
pass comparator = Distance()
references = comparator._enroll_samples(references)
comparator = DistanceComparator()
references = comparator.enroll_samples(references)
assert len(references)== n_references assert len(references)== n_references
assert references[0].data.shape == (dim,) assert references[0].data.shape == (dim,)
probes = database.probes() probes = database.probes()
scores = comparator.score_samples(probes, references) scores = comparator._score_samples(probes, references)
scores = list(itertools.chain(*scores))
assert len(scores) == n_probes*n_references assert len(scores) == n_probes*n_references
assert len(scores[0].samples)==n_references assert len(scores[0].samples)==n_references
## Test the transformer only
#transformer = Linearize()
#X = numpy.zeros(shape=(10,10))
#X_tr = transformer.transform(X)
#assert X_tr.shape == (100,)
## Test wrapped in to a Sample
#sample = Sample(X, key="1")
#transformer = SampleLinearize()
#X_tr = transformer.transform([sample])
#assert X_tr[0].data.shape == (100,)
## Test checkpoint
#with tempfile.TemporaryDirectory() as d:
#transformer = CheckpointSampleLinearize(features_dir=d)
#X_tr = transformer.transform([sample])
#assert X_tr[0].data.shape == (100,)
#assert os.path.exists(os.path.join(d, "1.h5"))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment