From a8746ea4ee6791ef64a9990b1829b3bfe94b5e24 Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Sun, 25 Feb 2018 20:57:44 +0100 Subject: [PATCH] Appended the metadata in the algorithm [sphinx] Documented the metadata info Fixed issue in the projector metadata --- bob/bio/base/test/dummy/algorithm.py | 27 ++++++++++++++++++++++++- bob/bio/base/test/test_scripts.py | 2 +- bob/bio/base/tools/algorithm.py | 30 ++++++++++++++++++++++------ bob/bio/base/tools/scoring.py | 12 +++++++---- doc/implementation.rst | 21 +++++++++++++++++++ setup.py | 1 + 6 files changed, 81 insertions(+), 12 deletions(-) diff --git a/bob/bio/base/test/dummy/algorithm.py b/bob/bio/base/test/dummy/algorithm.py index d1d25d48..ab427bae 100644 --- a/bob/bio/base/test/dummy/algorithm.py +++ b/bob/bio/base/test/dummy/algorithm.py @@ -1,6 +1,6 @@ import scipy.spatial import bob.io.base - +import numpy from bob.bio.base.algorithm import Algorithm _data = [5., 6., 7., 8., 9.] @@ -57,3 +57,28 @@ class DummyAlgorithm (Algorithm): return scipy.spatial.distance.euclidean(model, probe) algorithm = DummyAlgorithm() + + +class DummyAlgorithmMetadata (DummyAlgorithm): + + def train_projector(self, train_files, projector_file, metadata=None): + """Does nothing, simply converts the data type of the data, ignoring any annotation.""" + assert metadata is not None + return super(DummyAlgorithmMetadata, self).train_projector(train_files, projector_file) + + def enroll(self, enroll_features, metadata=None): + # Cheking if the all the metadata are from the same client_id + assert numpy.alltrue([metadata[0].client_id == m.client_id for m in metadata]) + #assert metadata is not None + return super(DummyAlgorithmMetadata, self).enroll(enroll_features) + + def score(self, model, probe, metadata=None): + """Returns the Euclidean distance between model and probe""" + assert metadata is not None + return super(DummyAlgorithmMetadata, self).score(model, probe) + + def project(self, feature, metadata=None): + assert metadata is not None + return super(DummyAlgorithmMetadata, self).project(feature) + +algorithm_metadata = DummyAlgorithmMetadata() diff --git a/bob/bio/base/test/test_scripts.py b/bob/bio/base/test/test_scripts.py index 60948798..072a431b 100644 --- a/bob/bio/base/test/test_scripts.py +++ b/bob/bio/base/test/test_scripts.py @@ -150,7 +150,7 @@ def test_verify_resources_metadata(): '-d', 'dummy', '-p', 'dummy_metadata', '-e', 'dummy_metadata', - '-a', 'dummy', + '-a', 'dummy_metadata', '--zt-norm', '--allow-missing-files', '-vs', 'test_resource', diff --git a/bob/bio/base/tools/algorithm.py b/bob/bio/base/tools/algorithm.py index 06a5a1be..ff2d69ff 100644 --- a/bob/bio/base/tools/algorithm.py +++ b/bob/bio/base/tools/algorithm.py @@ -2,6 +2,7 @@ import bob.io.base import os import logging +import inspect logger = logging.getLogger("bob.bio.base") from .FileSelector import FileSelector @@ -45,6 +46,7 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file)) # train projector logger.info("- Projection: loading training data") + train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client) train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client, allow_missing_files) if algorithm.split_training_features_by_client: @@ -53,8 +55,11 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files)) # perform training - algorithm.train_projector(train_features, fs.projector_file) - + if "metadata" in inspect.getargspec(algorithm.train_projector).args: + metadata = fs.database.training_files('train_projector', algorithm.split_training_features_by_client) + algorithm.train_projector(train_features, fs.projector_file, metadata=metadata) + else: + algorithm.train_projector(train_features, fs.projector_file) def project(algorithm, extractor, groups = None, indices = None, allow_missing_files = False, force = False): @@ -99,6 +104,7 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f feature_files = fs.feature_list(groups=groups) projected_files = fs.projected_list(groups=groups) + metadata = fs.original_data_list(groups=groups) # select a subset of indices to iterate if indices is not None: @@ -129,8 +135,12 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f bob.io.base.create_directories_safe(os.path.dirname(projected_file)) # load feature feature = extractor.read_feature(feature_file) + # project feature - projected = algorithm.project(feature) + if "metadata" in inspect.getargspec(algorithm.project).args: + projected = algorithm.project(feature, metadata=metadata) + else: + projected = algorithm.project(feature) if projected is None: if allow_missing_files: @@ -264,8 +274,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev # Removes old file if required if not utils.check_file(model_file, force, algorithm.min_model_file_size): - enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') + enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') if allow_missing_files: enroll_files = utils.filter_missing_files(enroll_files) if not enroll_files: @@ -280,7 +290,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev # load all files into memory enroll_features = [reader.read_feature(enroll_file) for enroll_file in enroll_files] - model = algorithm.enroll(enroll_features) + if "metadata" in inspect.getargspec(algorithm.enroll).args: + metadata = fs.database.enroll_files(group=group, model_id=model_id) + model = algorithm.enroll(enroll_features, metadata=metadata) + else: + model = algorithm.enroll(enroll_features) if model is None: if allow_missing_files: @@ -327,7 +341,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev # load all files into memory t_enroll_features = [reader.read_feature(t_enroll_file) for t_enroll_file in t_enroll_files] - t_model = algorithm.enroll(t_enroll_features) + if "metadata" in inspect.getargspec(algorithm.enroll).args: + metadata = fs.database.enroll_files(group=group, model_id=t_model_id) + t_model = algorithm.enroll(t_enroll_features, metadata=metadata) + else: + t_model = algorithm.enroll(t_enroll_features) if t_model is None: if allow_missing_files: diff --git a/bob/bio/base/tools/scoring.py b/bob/bio/base/tools/scoring.py index 1649add9..58f5d817 100644 --- a/bob/bio/base/tools/scoring.py +++ b/bob/bio/base/tools/scoring.py @@ -5,12 +5,12 @@ import bob.measure import numpy import os, sys import tarfile +import inspect import logging logger = logging.getLogger("bob.bio.base") from .FileSelector import FileSelector -from .extractor import read_features from .. import utils def _scores(algorithm, reader, model, probe_objects, allow_missing_files): @@ -28,12 +28,12 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files): return scores # Loops over the probe sets - for i, probe_element in enumerate(probes): + for i, probe_element, probe_metadata in zip(range(len(probes)), probes, probe_objects): if fs.uses_probe_file_sets(): assert isinstance(probe_element, list) # filter missing files if allow_missing_files: - probe_element = utils.filter_missing_files(probe_element) + probe_element = utils.filter_missing_files(probe_element, probe_objects) if not probe_element: # we keep the NaN score continue @@ -48,7 +48,11 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files): # read probe probe = reader.read_feature(probe_element) # compute score - scores[0,i] = algorithm.score(model, probe) + if "metadata" in inspect.getargspec(algorithm.score).args: + scores[0, i] = algorithm.score(model, probe, metadata=probe_metadata) + else: + scores[0, i] = algorithm.score(model, probe) + # Returns the scores return scores diff --git a/doc/implementation.rst b/doc/implementation.rst index 55908a9a..cccdb118 100644 --- a/doc/implementation.rst +++ b/doc/implementation.rst @@ -45,6 +45,13 @@ All of them implement the following two functions: .. note:: When the database does not provide annotations, the ``annotations`` parameter might be ``None``. + .. note:: + If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the preprocessor. + For that, the method ``__call__`` has a keyword called **metadata**. + If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile` + is shipped via this keyword argument. + + By default, the data returned by the preprocessor is of type :py:class:`numpy.ndarray`. In that case, the base class IO functionality can be used. If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it overwrites further functions from :py:class:`bob.bio.base.preprocessor.Preprocessor` that define the IO of your class: @@ -72,6 +79,13 @@ All extractor classes provide at least the functions: * ``__call__(self, data) -> feature``: Extracts the feature from the given preprocessed data. By default, the returned feature should be a :py:class:`numpy.ndarray`. + .. note:: + If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the extractor. + For that, the method ``__call__`` has a keyword called **metadata**. + If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile` + is shipped via this keyword argument. + + If features are not of type :py:class:`numpy.ndarray`, the ``write_feature`` function is overridden. In this case, also the function to read that kind of features needs to be overridden: @@ -180,6 +194,13 @@ These two functions are: * ``score_for_multiple_models(self, models, probe)``: In case your model store several features, **call** this function to compute the average (or min, max, ...) of the scores. * ``score_for_multiple_probes(self, model, probes)``: By default, the average (or min, max, ...) of the scores for all probes are computed. **Override** this function in case you want different behavior. + .. note:: + If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the algorithm. + For that, the methods ``train_projector``, ``project``, ``enroll`` and ``score`` have a keyword called **metadata**. + If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile` + is shipped via this keyword argument. + + Implemented Tools ----------------- diff --git a/setup.py b/setup.py index 4ddf3c93..d33d06d3 100644 --- a/setup.py +++ b/setup.py @@ -106,6 +106,7 @@ setup( 'bob.bio.algorithm': [ 'dummy = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only + 'dummy_metadata = bob.bio.base.test.dummy.algorithm:algorithm_metadata', 'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm', 'distance-cosine = bob.bio.base.config.algorithm.distance_cosine:algorithm', 'distance-hamming = bob.bio.base.config.algorithm.distance_hamming:algorithm', -- GitLab