diff --git a/bob/bio/base/test/dummy/algorithm.py b/bob/bio/base/test/dummy/algorithm.py index d1d25d487ea756a2ead758fabd266cb64853cd0a..c877cd9a0b6465b3dd893b0a3061b641f667358a 100644 --- a/bob/bio/base/test/dummy/algorithm.py +++ b/bob/bio/base/test/dummy/algorithm.py @@ -1,7 +1,8 @@ import scipy.spatial import bob.io.base - +import numpy from bob.bio.base.algorithm import Algorithm +from bob.bio.base.database import BioFile _data = [5., 6., 7., 8., 9.] @@ -57,3 +58,28 @@ class DummyAlgorithm (Algorithm): return scipy.spatial.distance.euclidean(model, probe) algorithm = DummyAlgorithm() + + +class DummyAlgorithmMetadata (DummyAlgorithm): + + def train_projector(self, train_files, projector_file, metadata=None): + """Does nothing, simply converts the data type of the data, ignoring any annotation.""" + assert isinstance(metadata, list) + return super(DummyAlgorithmMetadata, self).train_projector(train_files, projector_file) + + def enroll(self, enroll_features, metadata=None): + # Cheking if the all the metadata are from the same client_id + assert numpy.alltrue([metadata[0].client_id == m.client_id for m in metadata]) + #assert metadata is not None + return super(DummyAlgorithmMetadata, self).enroll(enroll_features) + + def score(self, model, probe, metadata=None): + """Returns the Euclidean distance between model and probe""" + assert isinstance(metadata, BioFile) + return super(DummyAlgorithmMetadata, self).score(model, probe) + + def project(self, feature, metadata=None): + assert isinstance(metadata, BioFile) + return super(DummyAlgorithmMetadata, self).project(feature) + +algorithm_metadata = DummyAlgorithmMetadata() diff --git a/bob/bio/base/test/dummy/extractor.py b/bob/bio/base/test/dummy/extractor.py index eca7517ca8a46676074f93410d95b98d71757721..2e53464569b5579765c06554797d3e28a2963ee2 100644 --- a/bob/bio/base/test/dummy/extractor.py +++ b/bob/bio/base/test/dummy/extractor.py @@ -1,6 +1,6 @@ import numpy import bob.bio.base - +from bob.bio.base.database import BioFile from bob.bio.base.extractor import Extractor _data = [0., 1., 2., 3., 4.] @@ -25,3 +25,13 @@ class DummyExtractor (Extractor): return data.astype(numpy.float).flatten() extractor = DummyExtractor() + + +class DummyExtractorMetadata (DummyExtractor): + + def __call__(self, data, metadata=None): + """Does nothing, simply converts the data type of the data, ignoring any annotation.""" + assert isinstance(metadata, BioFile) + return super(DummyExtractorMetadata, self).__call__(data) + +extractor_metadata = DummyExtractorMetadata() diff --git a/bob/bio/base/test/dummy/preprocessor.py b/bob/bio/base/test/dummy/preprocessor.py index 89376c31d59dd215b321153a6c56dbda75f32e0e..1c14bb1b1c97fbe72d135947d79da7d697193bef 100644 --- a/bob/bio/base/test/dummy/preprocessor.py +++ b/bob/bio/base/test/dummy/preprocessor.py @@ -1,4 +1,5 @@ from bob.bio.base.preprocessor import Preprocessor +from bob.bio.base.database import BioFile import numpy numpy.random.seed(10) @@ -16,5 +17,14 @@ class DummyPreprocessor (Preprocessor): return data - preprocessor = DummyPreprocessor() + + +class DummyPreprocessorMetadata (DummyPreprocessor): + + def __call__(self, data, annotation, metadata=None): + """Does nothing, simply converts the data type of the data, ignoring any annotation.""" + assert isinstance(metadata, BioFile) + return super(DummyPreprocessorMetadata, self).__call__(data, annotation) + +preprocessor_metadata = DummyPreprocessorMetadata() diff --git a/bob/bio/base/test/test_scripts.py b/bob/bio/base/test/test_scripts.py index 05aa6cad343eb45055c58592d6f3457525fa5e1b..072a431b03af1eea9afa23d96ddb211a1ec9e183 100644 --- a/bob/bio/base/test/test_scripts.py +++ b/bob/bio/base/test/test_scripts.py @@ -143,6 +143,25 @@ def test_verify_resources(): _verify(parameters, test_dir, 'test_resource') +def test_verify_resources_metadata(): + test_dir = tempfile.mkdtemp(prefix='bobtest_') + # define dummy parameters + parameters = [ + '-d', 'dummy', + '-p', 'dummy_metadata', + '-e', 'dummy_metadata', + '-a', 'dummy_metadata', + '--zt-norm', + '--allow-missing-files', + '-vs', 'test_resource', + '--temp-directory', test_dir, + '--result-directory', test_dir, + '--preferred-package', 'bob.bio.base' + ] + + _verify(parameters, test_dir, 'test_resource') + + def test_verify_commandline(): test_dir = tempfile.mkdtemp(prefix='bobtest_') # define dummy parameters diff --git a/bob/bio/base/tools/algorithm.py b/bob/bio/base/tools/algorithm.py index 06a5a1be5b5e0f4f3ce7dc823838cdac1ee8f726..2bbaa558352d0d5ba22f6bf2ce1743ebe19524f5 100644 --- a/bob/bio/base/tools/algorithm.py +++ b/bob/bio/base/tools/algorithm.py @@ -2,6 +2,7 @@ import bob.io.base import os import logging +import inspect logger = logging.getLogger("bob.bio.base") from .FileSelector import FileSelector @@ -45,6 +46,7 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file)) # train projector logger.info("- Projection: loading training data") + train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client) train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client, allow_missing_files) if algorithm.split_training_features_by_client: @@ -53,8 +55,11 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files)) # perform training - algorithm.train_projector(train_features, fs.projector_file) - + if utils.is_argument_available("metadata", algorithm.train_projector): + metadata = fs.database.training_files('train_projector', algorithm.split_training_features_by_client) + algorithm.train_projector(train_features, fs.projector_file, metadata=metadata) + else: + algorithm.train_projector(train_features, fs.projector_file) def project(algorithm, extractor, groups = None, indices = None, allow_missing_files = False, force = False): @@ -99,6 +104,7 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f feature_files = fs.feature_list(groups=groups) projected_files = fs.projected_list(groups=groups) + metadata = fs.original_data_list(groups=groups) # select a subset of indices to iterate if indices is not None: @@ -129,8 +135,12 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f bob.io.base.create_directories_safe(os.path.dirname(projected_file)) # load feature feature = extractor.read_feature(feature_file) + # project feature - projected = algorithm.project(feature) + if "metadata" in inspect.getargspec(algorithm.project).args: + projected = algorithm.project(feature, metadata=metadata[i]) + else: + projected = algorithm.project(feature) if projected is None: if allow_missing_files: @@ -247,6 +257,9 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev # which tool to use to read the features... reader = algorithm if algorithm.use_projected_features_for_enrollment else extractor + # Checking if we need to ship the metadata to the method enroll + has_metadata = utils.is_argument_available("metadata", algorithm.enroll) + # Create Models if 'N' in types: for group in groups: @@ -264,8 +277,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev # Removes old file if required if not utils.check_file(model_file, force, algorithm.min_model_file_size): - enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') + enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') if allow_missing_files: enroll_files = utils.filter_missing_files(enroll_files) if not enroll_files: @@ -280,7 +293,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev # load all files into memory enroll_features = [reader.read_feature(enroll_file) for enroll_file in enroll_files] - model = algorithm.enroll(enroll_features) + if has_metadata: + metadata = fs.database.enroll_files(group=group, model_id=model_id) + model = algorithm.enroll(enroll_features, metadata=metadata) + else: + model = algorithm.enroll(enroll_features) if model is None: if allow_missing_files: @@ -327,7 +344,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev # load all files into memory t_enroll_features = [reader.read_feature(t_enroll_file) for t_enroll_file in t_enroll_files] - t_model = algorithm.enroll(t_enroll_features) + if has_metadata: + metadata = fs.database.enroll_files(group=group, model_id=t_model_id) + t_model = algorithm.enroll(t_enroll_features, metadata=metadata) + else: + t_model = algorithm.enroll(t_enroll_features) if t_model is None: if allow_missing_files: diff --git a/bob/bio/base/tools/extractor.py b/bob/bio/base/tools/extractor.py index 281314677f7a5e8766fcbe8ee92986c91414de64..7f822c4918cf862360606e05cb1d63ad64f36423 100644 --- a/bob/bio/base/tools/extractor.py +++ b/bob/bio/base/tools/extractor.py @@ -2,6 +2,7 @@ import bob.io.base import os import logging +import inspect logger = logging.getLogger("bob.bio.base") from .FileSelector import FileSelector @@ -91,6 +92,11 @@ def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_ data_files = fs.preprocessed_data_list(groups=groups) feature_files = fs.feature_list(groups=groups) + if utils.is_argument_available("metadata", extractor.__call__): + metadata = fs.original_data_list(groups=groups) + else: + metadata = None + # select a subset of indices to iterate if indices is not None: index_range = range(indices[0], indices[1]) @@ -118,8 +124,12 @@ def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_ bob.io.base.create_directories_safe(os.path.dirname(feature_file)) # load data data = preprocessor.read_data(data_file) + # extract feature - feature = extractor(data) + if metadata is None: + feature = extractor(data) + else: + feature = extractor(data, metadata=metadata[i]) if feature is None: if allow_missing_files: diff --git a/bob/bio/base/tools/preprocessor.py b/bob/bio/base/tools/preprocessor.py index 83eafab8953940e3a668c398e6b4e6166fa5cccf..93be41169d8c5ef32c9514095d224db511dbae15 100644 --- a/bob/bio/base/tools/preprocessor.py +++ b/bob/bio/base/tools/preprocessor.py @@ -2,6 +2,7 @@ import bob.io.base import os import logging +import inspect logger = logging.getLogger("bob.bio.base") from .FileSelector import FileSelector @@ -46,6 +47,11 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files original_directory, original_extension = fs.original_directory_and_extension() preprocessed_data_files = fs.preprocessed_data_list(groups=groups) + if utils.is_argument_available("metadata", preprocessor.__call__): + metadata = fs.original_data_list(groups=groups) + else: + metadata = None + # select a subset of keys to iterate if indices is not None: index_range = range(indices[0], indices[1]) @@ -58,7 +64,7 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files # read annotation files annotation_list = fs.annotation_list(groups=groups) - # iterate over the selected files + # iterate over the selected files for i in index_range: preprocessed_data_file = preprocessed_data_files[i] file_object = data_files[i] @@ -78,7 +84,11 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files annotations = fs.get_annotations(annotation_list[i]) # call the preprocessor - preprocessed_data = preprocessor(data, annotations) + if metadata is None: + preprocessed_data = preprocessor(data, annotations) + else: + preprocessed_data = preprocessor(data, annotations, metadata=metadata[i]) + if preprocessed_data is None: if allow_missing_files: logger.debug("... Processing original data file '%s' was not successful", file_name) @@ -90,8 +100,7 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files preprocessor.write_data(preprocessed_data, preprocessed_data_file) else: - logger.debug("... Skipping original data file '%s' since preprocessed data '%s' exists", file_name, preprocessed_data_file) - + logger.debug("... Skipping original data file '%s' since preprocessed data '%s' exists", file_name, preprocessed_data_file) def read_preprocessed_data(file_names, preprocessor, split_by_client = False, allow_missing_files = False): diff --git a/bob/bio/base/tools/scoring.py b/bob/bio/base/tools/scoring.py index 1649add9826561f8c13a9336c9538154958a6773..18aed256117d7c06dcb5dab84b50c25c5305ded0 100644 --- a/bob/bio/base/tools/scoring.py +++ b/bob/bio/base/tools/scoring.py @@ -5,12 +5,12 @@ import bob.measure import numpy import os, sys import tarfile +import inspect import logging logger = logging.getLogger("bob.bio.base") from .FileSelector import FileSelector -from .extractor import read_features from .. import utils def _scores(algorithm, reader, model, probe_objects, allow_missing_files): @@ -27,13 +27,16 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files): # if we have no model, all scores are undefined return scores + # Checking if we need to ship the metadata in the scoring method + has_metadata = utils.is_argument_available("metadata", algorithm.score) + # Loops over the probe sets - for i, probe_element in enumerate(probes): + for i, probe_element, probe_metadata in zip(range(len(probes)), probes, probe_objects): if fs.uses_probe_file_sets(): assert isinstance(probe_element, list) # filter missing files if allow_missing_files: - probe_element = utils.filter_missing_files(probe_element) + probe_element = utils.filter_missing_files(probe_element, probe_objects) if not probe_element: # we keep the NaN score continue @@ -47,8 +50,13 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files): continue # read probe probe = reader.read_feature(probe_element) + # compute score - scores[0,i] = algorithm.score(model, probe) + if has_metadata: + scores[0, i] = algorithm.score(model, probe, metadata=probe_metadata) + else: + scores[0, i] = algorithm.score(model, probe) + # Returns the scores return scores diff --git a/bob/bio/base/utils/__init__.py b/bob/bio/base/utils/__init__.py index b65569cb5106e308123ab5c1a4022ee54a4fbc81..2cd7501a46599d3497387dc3c19ce2c32b3cf01b 100644 --- a/bob/bio/base/utils/__init__.py +++ b/bob/bio/base/utils/__init__.py @@ -7,7 +7,8 @@ from .resources import * from .io import * from .singleton import * from . import processors - +import six +import inspect import numpy def score_fusion_strategy(strategy_name = 'average'): @@ -53,6 +54,27 @@ def selected_elements(list_of_elements, desired_number_of_elements = None): # sub-select return [list_of_elements[i] for i in selected_indices(total_number_of_elements, desired_number_of_elements)] + def pretty_print(obj, kwargs): """Returns a pretty-print of the parameters to the constructor of a class, which should be able to copy-paste on the command line to create the object (with few exceptions).""" return "%s(%s)" % (str(obj.__class__), ", ".join(["%s='%s'" % (key,value) if isinstance(value, str) else "%s=%s" % (key, value) for key,value in kwargs.items() if value is not None])) + + +def is_argument_available(argument, method): + """ + Check if an argument (or keyword argument) is available in a method + + Attributes + ---------- + argument: str + The name of the argument (or keyword argument). + + method: + Pointer to the method + + """ + + if six.PY2: + return argument in inspect.getargspec(method).args + else: + return argument in inspect.signature(method).parameters.keys() diff --git a/doc/implementation.rst b/doc/implementation.rst index 55908a9a79f496e94710ea1f63e6ce84a7c843df..cccdb11822467bae66bff758dc41a18a2db25e44 100644 --- a/doc/implementation.rst +++ b/doc/implementation.rst @@ -45,6 +45,13 @@ All of them implement the following two functions: .. note:: When the database does not provide annotations, the ``annotations`` parameter might be ``None``. + .. note:: + If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the preprocessor. + For that, the method ``__call__`` has a keyword called **metadata**. + If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile` + is shipped via this keyword argument. + + By default, the data returned by the preprocessor is of type :py:class:`numpy.ndarray`. In that case, the base class IO functionality can be used. If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it overwrites further functions from :py:class:`bob.bio.base.preprocessor.Preprocessor` that define the IO of your class: @@ -72,6 +79,13 @@ All extractor classes provide at least the functions: * ``__call__(self, data) -> feature``: Extracts the feature from the given preprocessed data. By default, the returned feature should be a :py:class:`numpy.ndarray`. + .. note:: + If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the extractor. + For that, the method ``__call__`` has a keyword called **metadata**. + If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile` + is shipped via this keyword argument. + + If features are not of type :py:class:`numpy.ndarray`, the ``write_feature`` function is overridden. In this case, also the function to read that kind of features needs to be overridden: @@ -180,6 +194,13 @@ These two functions are: * ``score_for_multiple_models(self, models, probe)``: In case your model store several features, **call** this function to compute the average (or min, max, ...) of the scores. * ``score_for_multiple_probes(self, model, probes)``: By default, the average (or min, max, ...) of the scores for all probes are computed. **Override** this function in case you want different behavior. + .. note:: + If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the algorithm. + For that, the methods ``train_projector``, ``project``, ``enroll`` and ``score`` have a keyword called **metadata**. + If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile` + is shipped via this keyword argument. + + Implemented Tools ----------------- diff --git a/setup.py b/setup.py index e93fd72f55d01a62b57200f3fb94bdc5b808e3b3..d33d06d3c71f8cf11d37a92bb4181b464c3adf73 100644 --- a/setup.py +++ b/setup.py @@ -95,15 +95,18 @@ setup( 'bob.bio.preprocessor': [ 'dummy = bob.bio.base.test.dummy.preprocessor:preprocessor', # for test purposes only 'filename = bob.bio.base.config.preprocessor.filename:preprocessor', + 'dummy_metadata = bob.bio.base.test.dummy.preprocessor:preprocessor_metadata', ], 'bob.bio.extractor': [ 'dummy = bob.bio.base.test.dummy.extractor:extractor', # for test purposes only + 'dummy_metadata = bob.bio.base.test.dummy.extractor:extractor_metadata', # for test purposes only 'linearize = bob.bio.base.config.extractor.linearize:extractor', ], 'bob.bio.algorithm': [ 'dummy = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only + 'dummy_metadata = bob.bio.base.test.dummy.algorithm:algorithm_metadata', 'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm', 'distance-cosine = bob.bio.base.config.algorithm.distance_cosine:algorithm', 'distance-hamming = bob.bio.base.config.algorithm.distance_hamming:algorithm',