Skip to content
Snippets Groups Projects
Commit a8746ea4 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Appended the metadata in the algorithm

[sphinx] Documented the metadata info

Fixed issue in the projector metadata
parent 54bf9007
No related branches found
No related tags found
1 merge request!125Included metadata during the feature extraction.
Pipeline #
import scipy.spatial
import bob.io.base
import numpy
from bob.bio.base.algorithm import Algorithm
_data = [5., 6., 7., 8., 9.]
......@@ -57,3 +57,28 @@ class DummyAlgorithm (Algorithm):
return scipy.spatial.distance.euclidean(model, probe)
algorithm = DummyAlgorithm()
class DummyAlgorithmMetadata (DummyAlgorithm):
def train_projector(self, train_files, projector_file, metadata=None):
"""Does nothing, simply converts the data type of the data, ignoring any annotation."""
assert metadata is not None
return super(DummyAlgorithmMetadata, self).train_projector(train_files, projector_file)
def enroll(self, enroll_features, metadata=None):
# Cheking if the all the metadata are from the same client_id
assert numpy.alltrue([metadata[0].client_id == m.client_id for m in metadata])
#assert metadata is not None
return super(DummyAlgorithmMetadata, self).enroll(enroll_features)
def score(self, model, probe, metadata=None):
"""Returns the Euclidean distance between model and probe"""
assert metadata is not None
return super(DummyAlgorithmMetadata, self).score(model, probe)
def project(self, feature, metadata=None):
assert metadata is not None
return super(DummyAlgorithmMetadata, self).project(feature)
algorithm_metadata = DummyAlgorithmMetadata()
......@@ -150,7 +150,7 @@ def test_verify_resources_metadata():
'-d', 'dummy',
'-p', 'dummy_metadata',
'-e', 'dummy_metadata',
'-a', 'dummy',
'-a', 'dummy_metadata',
'--zt-norm',
'--allow-missing-files',
'-vs', 'test_resource',
......
......@@ -2,6 +2,7 @@ import bob.io.base
import os
import logging
import inspect
logger = logging.getLogger("bob.bio.base")
from .FileSelector import FileSelector
......@@ -45,6 +46,7 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F
bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file))
# train projector
logger.info("- Projection: loading training data")
train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client)
train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client, allow_missing_files)
if algorithm.split_training_features_by_client:
......@@ -53,8 +55,11 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F
logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files))
# perform training
algorithm.train_projector(train_features, fs.projector_file)
if "metadata" in inspect.getargspec(algorithm.train_projector).args:
metadata = fs.database.training_files('train_projector', algorithm.split_training_features_by_client)
algorithm.train_projector(train_features, fs.projector_file, metadata=metadata)
else:
algorithm.train_projector(train_features, fs.projector_file)
def project(algorithm, extractor, groups = None, indices = None, allow_missing_files = False, force = False):
......@@ -99,6 +104,7 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f
feature_files = fs.feature_list(groups=groups)
projected_files = fs.projected_list(groups=groups)
metadata = fs.original_data_list(groups=groups)
# select a subset of indices to iterate
if indices is not None:
......@@ -129,8 +135,12 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f
bob.io.base.create_directories_safe(os.path.dirname(projected_file))
# load feature
feature = extractor.read_feature(feature_file)
# project feature
projected = algorithm.project(feature)
if "metadata" in inspect.getargspec(algorithm.project).args:
projected = algorithm.project(feature, metadata=metadata)
else:
projected = algorithm.project(feature)
if projected is None:
if allow_missing_files:
......@@ -264,8 +274,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
# Removes old file if required
if not utils.check_file(model_file, force,
algorithm.min_model_file_size):
enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted')
enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted')
if allow_missing_files:
enroll_files = utils.filter_missing_files(enroll_files)
if not enroll_files:
......@@ -280,7 +290,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
# load all files into memory
enroll_features = [reader.read_feature(enroll_file) for enroll_file in enroll_files]
model = algorithm.enroll(enroll_features)
if "metadata" in inspect.getargspec(algorithm.enroll).args:
metadata = fs.database.enroll_files(group=group, model_id=model_id)
model = algorithm.enroll(enroll_features, metadata=metadata)
else:
model = algorithm.enroll(enroll_features)
if model is None:
if allow_missing_files:
......@@ -327,7 +341,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
# load all files into memory
t_enroll_features = [reader.read_feature(t_enroll_file) for t_enroll_file in t_enroll_files]
t_model = algorithm.enroll(t_enroll_features)
if "metadata" in inspect.getargspec(algorithm.enroll).args:
metadata = fs.database.enroll_files(group=group, model_id=t_model_id)
t_model = algorithm.enroll(t_enroll_features, metadata=metadata)
else:
t_model = algorithm.enroll(t_enroll_features)
if t_model is None:
if allow_missing_files:
......
......@@ -5,12 +5,12 @@ import bob.measure
import numpy
import os, sys
import tarfile
import inspect
import logging
logger = logging.getLogger("bob.bio.base")
from .FileSelector import FileSelector
from .extractor import read_features
from .. import utils
def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
......@@ -28,12 +28,12 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
return scores
# Loops over the probe sets
for i, probe_element in enumerate(probes):
for i, probe_element, probe_metadata in zip(range(len(probes)), probes, probe_objects):
if fs.uses_probe_file_sets():
assert isinstance(probe_element, list)
# filter missing files
if allow_missing_files:
probe_element = utils.filter_missing_files(probe_element)
probe_element = utils.filter_missing_files(probe_element, probe_objects)
if not probe_element:
# we keep the NaN score
continue
......@@ -48,7 +48,11 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
# read probe
probe = reader.read_feature(probe_element)
# compute score
scores[0,i] = algorithm.score(model, probe)
if "metadata" in inspect.getargspec(algorithm.score).args:
scores[0, i] = algorithm.score(model, probe, metadata=probe_metadata)
else:
scores[0, i] = algorithm.score(model, probe)
# Returns the scores
return scores
......
......@@ -45,6 +45,13 @@ All of them implement the following two functions:
.. note::
When the database does not provide annotations, the ``annotations`` parameter might be ``None``.
.. note::
If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the preprocessor.
For that, the method ``__call__`` has a keyword called **metadata**.
If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
is shipped via this keyword argument.
By default, the data returned by the preprocessor is of type :py:class:`numpy.ndarray`.
In that case, the base class IO functionality can be used.
If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it overwrites further functions from :py:class:`bob.bio.base.preprocessor.Preprocessor` that define the IO of your class:
......@@ -72,6 +79,13 @@ All extractor classes provide at least the functions:
* ``__call__(self, data) -> feature``: Extracts the feature from the given preprocessed data.
By default, the returned feature should be a :py:class:`numpy.ndarray`.
.. note::
If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the extractor.
For that, the method ``__call__`` has a keyword called **metadata**.
If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
is shipped via this keyword argument.
If features are not of type :py:class:`numpy.ndarray`, the ``write_feature`` function is overridden.
In this case, also the function to read that kind of features needs to be overridden:
......@@ -180,6 +194,13 @@ These two functions are:
* ``score_for_multiple_models(self, models, probe)``: In case your model store several features, **call** this function to compute the average (or min, max, ...) of the scores.
* ``score_for_multiple_probes(self, model, probes)``: By default, the average (or min, max, ...) of the scores for all probes are computed. **Override** this function in case you want different behavior.
.. note::
If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the algorithm.
For that, the methods ``train_projector``, ``project``, ``enroll`` and ``score`` have a keyword called **metadata**.
If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
is shipped via this keyword argument.
Implemented Tools
-----------------
......
......@@ -106,6 +106,7 @@ setup(
'bob.bio.algorithm': [
'dummy = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only
'dummy_metadata = bob.bio.base.test.dummy.algorithm:algorithm_metadata',
'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm',
'distance-cosine = bob.bio.base.config.algorithm.distance_cosine:algorithm',
'distance-hamming = bob.bio.base.config.algorithm.distance_hamming:algorithm',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment