Skip to content
Snippets Groups Projects
Commit a8746ea4 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Appended the metadata in the algorithm

[sphinx] Documented the metadata info

Fixed issue in the projector metadata
parent 54bf9007
Branches
No related tags found
1 merge request!125Included metadata during the feature extraction.
Pipeline #
import scipy.spatial import scipy.spatial
import bob.io.base import bob.io.base
import numpy
from bob.bio.base.algorithm import Algorithm from bob.bio.base.algorithm import Algorithm
_data = [5., 6., 7., 8., 9.] _data = [5., 6., 7., 8., 9.]
...@@ -57,3 +57,28 @@ class DummyAlgorithm (Algorithm): ...@@ -57,3 +57,28 @@ class DummyAlgorithm (Algorithm):
return scipy.spatial.distance.euclidean(model, probe) return scipy.spatial.distance.euclidean(model, probe)
algorithm = DummyAlgorithm() algorithm = DummyAlgorithm()
class DummyAlgorithmMetadata (DummyAlgorithm):
def train_projector(self, train_files, projector_file, metadata=None):
"""Does nothing, simply converts the data type of the data, ignoring any annotation."""
assert metadata is not None
return super(DummyAlgorithmMetadata, self).train_projector(train_files, projector_file)
def enroll(self, enroll_features, metadata=None):
# Cheking if the all the metadata are from the same client_id
assert numpy.alltrue([metadata[0].client_id == m.client_id for m in metadata])
#assert metadata is not None
return super(DummyAlgorithmMetadata, self).enroll(enroll_features)
def score(self, model, probe, metadata=None):
"""Returns the Euclidean distance between model and probe"""
assert metadata is not None
return super(DummyAlgorithmMetadata, self).score(model, probe)
def project(self, feature, metadata=None):
assert metadata is not None
return super(DummyAlgorithmMetadata, self).project(feature)
algorithm_metadata = DummyAlgorithmMetadata()
...@@ -150,7 +150,7 @@ def test_verify_resources_metadata(): ...@@ -150,7 +150,7 @@ def test_verify_resources_metadata():
'-d', 'dummy', '-d', 'dummy',
'-p', 'dummy_metadata', '-p', 'dummy_metadata',
'-e', 'dummy_metadata', '-e', 'dummy_metadata',
'-a', 'dummy', '-a', 'dummy_metadata',
'--zt-norm', '--zt-norm',
'--allow-missing-files', '--allow-missing-files',
'-vs', 'test_resource', '-vs', 'test_resource',
......
...@@ -2,6 +2,7 @@ import bob.io.base ...@@ -2,6 +2,7 @@ import bob.io.base
import os import os
import logging import logging
import inspect
logger = logging.getLogger("bob.bio.base") logger = logging.getLogger("bob.bio.base")
from .FileSelector import FileSelector from .FileSelector import FileSelector
...@@ -45,6 +46,7 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F ...@@ -45,6 +46,7 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F
bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file)) bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file))
# train projector # train projector
logger.info("- Projection: loading training data") logger.info("- Projection: loading training data")
train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client) train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client)
train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client, allow_missing_files) train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client, allow_missing_files)
if algorithm.split_training_features_by_client: if algorithm.split_training_features_by_client:
...@@ -53,8 +55,11 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F ...@@ -53,8 +55,11 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F
logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files)) logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files))
# perform training # perform training
algorithm.train_projector(train_features, fs.projector_file) if "metadata" in inspect.getargspec(algorithm.train_projector).args:
metadata = fs.database.training_files('train_projector', algorithm.split_training_features_by_client)
algorithm.train_projector(train_features, fs.projector_file, metadata=metadata)
else:
algorithm.train_projector(train_features, fs.projector_file)
def project(algorithm, extractor, groups = None, indices = None, allow_missing_files = False, force = False): def project(algorithm, extractor, groups = None, indices = None, allow_missing_files = False, force = False):
...@@ -99,6 +104,7 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f ...@@ -99,6 +104,7 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f
feature_files = fs.feature_list(groups=groups) feature_files = fs.feature_list(groups=groups)
projected_files = fs.projected_list(groups=groups) projected_files = fs.projected_list(groups=groups)
metadata = fs.original_data_list(groups=groups)
# select a subset of indices to iterate # select a subset of indices to iterate
if indices is not None: if indices is not None:
...@@ -129,8 +135,12 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f ...@@ -129,8 +135,12 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f
bob.io.base.create_directories_safe(os.path.dirname(projected_file)) bob.io.base.create_directories_safe(os.path.dirname(projected_file))
# load feature # load feature
feature = extractor.read_feature(feature_file) feature = extractor.read_feature(feature_file)
# project feature # project feature
projected = algorithm.project(feature) if "metadata" in inspect.getargspec(algorithm.project).args:
projected = algorithm.project(feature, metadata=metadata)
else:
projected = algorithm.project(feature)
if projected is None: if projected is None:
if allow_missing_files: if allow_missing_files:
...@@ -264,8 +274,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev ...@@ -264,8 +274,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
# Removes old file if required # Removes old file if required
if not utils.check_file(model_file, force, if not utils.check_file(model_file, force,
algorithm.min_model_file_size): algorithm.min_model_file_size):
enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted')
enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted')
if allow_missing_files: if allow_missing_files:
enroll_files = utils.filter_missing_files(enroll_files) enroll_files = utils.filter_missing_files(enroll_files)
if not enroll_files: if not enroll_files:
...@@ -280,7 +290,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev ...@@ -280,7 +290,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
# load all files into memory # load all files into memory
enroll_features = [reader.read_feature(enroll_file) for enroll_file in enroll_files] enroll_features = [reader.read_feature(enroll_file) for enroll_file in enroll_files]
model = algorithm.enroll(enroll_features) if "metadata" in inspect.getargspec(algorithm.enroll).args:
metadata = fs.database.enroll_files(group=group, model_id=model_id)
model = algorithm.enroll(enroll_features, metadata=metadata)
else:
model = algorithm.enroll(enroll_features)
if model is None: if model is None:
if allow_missing_files: if allow_missing_files:
...@@ -327,7 +341,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev ...@@ -327,7 +341,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
# load all files into memory # load all files into memory
t_enroll_features = [reader.read_feature(t_enroll_file) for t_enroll_file in t_enroll_files] t_enroll_features = [reader.read_feature(t_enroll_file) for t_enroll_file in t_enroll_files]
t_model = algorithm.enroll(t_enroll_features) if "metadata" in inspect.getargspec(algorithm.enroll).args:
metadata = fs.database.enroll_files(group=group, model_id=t_model_id)
t_model = algorithm.enroll(t_enroll_features, metadata=metadata)
else:
t_model = algorithm.enroll(t_enroll_features)
if t_model is None: if t_model is None:
if allow_missing_files: if allow_missing_files:
......
...@@ -5,12 +5,12 @@ import bob.measure ...@@ -5,12 +5,12 @@ import bob.measure
import numpy import numpy
import os, sys import os, sys
import tarfile import tarfile
import inspect
import logging import logging
logger = logging.getLogger("bob.bio.base") logger = logging.getLogger("bob.bio.base")
from .FileSelector import FileSelector from .FileSelector import FileSelector
from .extractor import read_features
from .. import utils from .. import utils
def _scores(algorithm, reader, model, probe_objects, allow_missing_files): def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
...@@ -28,12 +28,12 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files): ...@@ -28,12 +28,12 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
return scores return scores
# Loops over the probe sets # Loops over the probe sets
for i, probe_element in enumerate(probes): for i, probe_element, probe_metadata in zip(range(len(probes)), probes, probe_objects):
if fs.uses_probe_file_sets(): if fs.uses_probe_file_sets():
assert isinstance(probe_element, list) assert isinstance(probe_element, list)
# filter missing files # filter missing files
if allow_missing_files: if allow_missing_files:
probe_element = utils.filter_missing_files(probe_element) probe_element = utils.filter_missing_files(probe_element, probe_objects)
if not probe_element: if not probe_element:
# we keep the NaN score # we keep the NaN score
continue continue
...@@ -48,7 +48,11 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files): ...@@ -48,7 +48,11 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
# read probe # read probe
probe = reader.read_feature(probe_element) probe = reader.read_feature(probe_element)
# compute score # compute score
scores[0,i] = algorithm.score(model, probe) if "metadata" in inspect.getargspec(algorithm.score).args:
scores[0, i] = algorithm.score(model, probe, metadata=probe_metadata)
else:
scores[0, i] = algorithm.score(model, probe)
# Returns the scores # Returns the scores
return scores return scores
......
...@@ -45,6 +45,13 @@ All of them implement the following two functions: ...@@ -45,6 +45,13 @@ All of them implement the following two functions:
.. note:: .. note::
When the database does not provide annotations, the ``annotations`` parameter might be ``None``. When the database does not provide annotations, the ``annotations`` parameter might be ``None``.
.. note::
If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the preprocessor.
For that, the method ``__call__`` has a keyword called **metadata**.
If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
is shipped via this keyword argument.
By default, the data returned by the preprocessor is of type :py:class:`numpy.ndarray`. By default, the data returned by the preprocessor is of type :py:class:`numpy.ndarray`.
In that case, the base class IO functionality can be used. In that case, the base class IO functionality can be used.
If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it overwrites further functions from :py:class:`bob.bio.base.preprocessor.Preprocessor` that define the IO of your class: If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it overwrites further functions from :py:class:`bob.bio.base.preprocessor.Preprocessor` that define the IO of your class:
...@@ -72,6 +79,13 @@ All extractor classes provide at least the functions: ...@@ -72,6 +79,13 @@ All extractor classes provide at least the functions:
* ``__call__(self, data) -> feature``: Extracts the feature from the given preprocessed data. * ``__call__(self, data) -> feature``: Extracts the feature from the given preprocessed data.
By default, the returned feature should be a :py:class:`numpy.ndarray`. By default, the returned feature should be a :py:class:`numpy.ndarray`.
.. note::
If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the extractor.
For that, the method ``__call__`` has a keyword called **metadata**.
If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
is shipped via this keyword argument.
If features are not of type :py:class:`numpy.ndarray`, the ``write_feature`` function is overridden. If features are not of type :py:class:`numpy.ndarray`, the ``write_feature`` function is overridden.
In this case, also the function to read that kind of features needs to be overridden: In this case, also the function to read that kind of features needs to be overridden:
...@@ -180,6 +194,13 @@ These two functions are: ...@@ -180,6 +194,13 @@ These two functions are:
* ``score_for_multiple_models(self, models, probe)``: In case your model store several features, **call** this function to compute the average (or min, max, ...) of the scores. * ``score_for_multiple_models(self, models, probe)``: In case your model store several features, **call** this function to compute the average (or min, max, ...) of the scores.
* ``score_for_multiple_probes(self, model, probes)``: By default, the average (or min, max, ...) of the scores for all probes are computed. **Override** this function in case you want different behavior. * ``score_for_multiple_probes(self, model, probes)``: By default, the average (or min, max, ...) of the scores for all probes are computed. **Override** this function in case you want different behavior.
.. note::
If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the algorithm.
For that, the methods ``train_projector``, ``project``, ``enroll`` and ``score`` have a keyword called **metadata**.
If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
is shipped via this keyword argument.
Implemented Tools Implemented Tools
----------------- -----------------
......
...@@ -106,6 +106,7 @@ setup( ...@@ -106,6 +106,7 @@ setup(
'bob.bio.algorithm': [ 'bob.bio.algorithm': [
'dummy = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only 'dummy = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only
'dummy_metadata = bob.bio.base.test.dummy.algorithm:algorithm_metadata',
'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm', 'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm',
'distance-cosine = bob.bio.base.config.algorithm.distance_cosine:algorithm', 'distance-cosine = bob.bio.base.config.algorithm.distance_cosine:algorithm',
'distance-hamming = bob.bio.base.config.algorithm.distance_hamming:algorithm', 'distance-hamming = bob.bio.base.config.algorithm.distance_hamming:algorithm',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment