Commit dd5f62bb authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Merge branch 'meta-information' into 'master'

Included metadata during the feature extraction.

See merge request !125
parents 9d8ee935 1f408e55
Pipeline #17275 passed with stages
in 20 minutes and 8 seconds
import scipy.spatial
import bob.io.base
import numpy
from bob.bio.base.algorithm import Algorithm
from bob.bio.base.database import BioFile
_data = [5., 6., 7., 8., 9.]
......@@ -57,3 +58,28 @@ class DummyAlgorithm (Algorithm):
return scipy.spatial.distance.euclidean(model, probe)
algorithm = DummyAlgorithm()
class DummyAlgorithmMetadata (DummyAlgorithm):
def train_projector(self, train_files, projector_file, metadata=None):
"""Does nothing, simply converts the data type of the data, ignoring any annotation."""
assert isinstance(metadata, list)
return super(DummyAlgorithmMetadata, self).train_projector(train_files, projector_file)
def enroll(self, enroll_features, metadata=None):
# Cheking if the all the metadata are from the same client_id
assert numpy.alltrue([metadata[0].client_id == m.client_id for m in metadata])
#assert metadata is not None
return super(DummyAlgorithmMetadata, self).enroll(enroll_features)
def score(self, model, probe, metadata=None):
"""Returns the Euclidean distance between model and probe"""
assert isinstance(metadata, BioFile)
return super(DummyAlgorithmMetadata, self).score(model, probe)
def project(self, feature, metadata=None):
assert isinstance(metadata, BioFile)
return super(DummyAlgorithmMetadata, self).project(feature)
algorithm_metadata = DummyAlgorithmMetadata()
import numpy
import bob.bio.base
from bob.bio.base.database import BioFile
from bob.bio.base.extractor import Extractor
_data = [0., 1., 2., 3., 4.]
......@@ -25,3 +25,13 @@ class DummyExtractor (Extractor):
return data.astype(numpy.float).flatten()
extractor = DummyExtractor()
class DummyExtractorMetadata (DummyExtractor):
def __call__(self, data, metadata=None):
"""Does nothing, simply converts the data type of the data, ignoring any annotation."""
assert isinstance(metadata, BioFile)
return super(DummyExtractorMetadata, self).__call__(data)
extractor_metadata = DummyExtractorMetadata()
from bob.bio.base.preprocessor import Preprocessor
from bob.bio.base.database import BioFile
import numpy
numpy.random.seed(10)
......@@ -16,5 +17,14 @@ class DummyPreprocessor (Preprocessor):
return data
preprocessor = DummyPreprocessor()
class DummyPreprocessorMetadata (DummyPreprocessor):
def __call__(self, data, annotation, metadata=None):
"""Does nothing, simply converts the data type of the data, ignoring any annotation."""
assert isinstance(metadata, BioFile)
return super(DummyPreprocessorMetadata, self).__call__(data, annotation)
preprocessor_metadata = DummyPreprocessorMetadata()
......@@ -143,6 +143,25 @@ def test_verify_resources():
_verify(parameters, test_dir, 'test_resource')
def test_verify_resources_metadata():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'dummy_metadata',
'-e', 'dummy_metadata',
'-a', 'dummy_metadata',
'--zt-norm',
'--allow-missing-files',
'-vs', 'test_resource',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.base'
]
_verify(parameters, test_dir, 'test_resource')
def test_verify_commandline():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
......
......@@ -2,6 +2,7 @@ import bob.io.base
import os
import logging
import inspect
logger = logging.getLogger("bob.bio.base")
from .FileSelector import FileSelector
......@@ -45,6 +46,7 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F
bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file))
# train projector
logger.info("- Projection: loading training data")
train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client)
train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client, allow_missing_files)
if algorithm.split_training_features_by_client:
......@@ -53,8 +55,11 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F
logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files))
# perform training
algorithm.train_projector(train_features, fs.projector_file)
if utils.is_argument_available("metadata", algorithm.train_projector):
metadata = fs.database.training_files('train_projector', algorithm.split_training_features_by_client)
algorithm.train_projector(train_features, fs.projector_file, metadata=metadata)
else:
algorithm.train_projector(train_features, fs.projector_file)
def project(algorithm, extractor, groups = None, indices = None, allow_missing_files = False, force = False):
......@@ -99,6 +104,7 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f
feature_files = fs.feature_list(groups=groups)
projected_files = fs.projected_list(groups=groups)
metadata = fs.original_data_list(groups=groups)
# select a subset of indices to iterate
if indices is not None:
......@@ -129,8 +135,12 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f
bob.io.base.create_directories_safe(os.path.dirname(projected_file))
# load feature
feature = extractor.read_feature(feature_file)
# project feature
projected = algorithm.project(feature)
if "metadata" in inspect.getargspec(algorithm.project).args:
projected = algorithm.project(feature, metadata=metadata[i])
else:
projected = algorithm.project(feature)
if projected is None:
if allow_missing_files:
......@@ -247,6 +257,9 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
# which tool to use to read the features...
reader = algorithm if algorithm.use_projected_features_for_enrollment else extractor
# Checking if we need to ship the metadata to the method enroll
has_metadata = utils.is_argument_available("metadata", algorithm.enroll)
# Create Models
if 'N' in types:
for group in groups:
......@@ -264,8 +277,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
# Removes old file if required
if not utils.check_file(model_file, force,
algorithm.min_model_file_size):
enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted')
enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted')
if allow_missing_files:
enroll_files = utils.filter_missing_files(enroll_files)
if not enroll_files:
......@@ -280,7 +293,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
# load all files into memory
enroll_features = [reader.read_feature(enroll_file) for enroll_file in enroll_files]
model = algorithm.enroll(enroll_features)
if has_metadata:
metadata = fs.database.enroll_files(group=group, model_id=model_id)
model = algorithm.enroll(enroll_features, metadata=metadata)
else:
model = algorithm.enroll(enroll_features)
if model is None:
if allow_missing_files:
......@@ -327,7 +344,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
# load all files into memory
t_enroll_features = [reader.read_feature(t_enroll_file) for t_enroll_file in t_enroll_files]
t_model = algorithm.enroll(t_enroll_features)
if has_metadata:
metadata = fs.database.enroll_files(group=group, model_id=t_model_id)
t_model = algorithm.enroll(t_enroll_features, metadata=metadata)
else:
t_model = algorithm.enroll(t_enroll_features)
if t_model is None:
if allow_missing_files:
......
......@@ -2,6 +2,7 @@ import bob.io.base
import os
import logging
import inspect
logger = logging.getLogger("bob.bio.base")
from .FileSelector import FileSelector
......@@ -91,6 +92,11 @@ def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_
data_files = fs.preprocessed_data_list(groups=groups)
feature_files = fs.feature_list(groups=groups)
if utils.is_argument_available("metadata", extractor.__call__):
metadata = fs.original_data_list(groups=groups)
else:
metadata = None
# select a subset of indices to iterate
if indices is not None:
index_range = range(indices[0], indices[1])
......@@ -118,8 +124,12 @@ def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_
bob.io.base.create_directories_safe(os.path.dirname(feature_file))
# load data
data = preprocessor.read_data(data_file)
# extract feature
feature = extractor(data)
if metadata is None:
feature = extractor(data)
else:
feature = extractor(data, metadata=metadata[i])
if feature is None:
if allow_missing_files:
......
......@@ -2,6 +2,7 @@ import bob.io.base
import os
import logging
import inspect
logger = logging.getLogger("bob.bio.base")
from .FileSelector import FileSelector
......@@ -46,6 +47,11 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
original_directory, original_extension = fs.original_directory_and_extension()
preprocessed_data_files = fs.preprocessed_data_list(groups=groups)
if utils.is_argument_available("metadata", preprocessor.__call__):
metadata = fs.original_data_list(groups=groups)
else:
metadata = None
# select a subset of keys to iterate
if indices is not None:
index_range = range(indices[0], indices[1])
......@@ -58,7 +64,7 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
# read annotation files
annotation_list = fs.annotation_list(groups=groups)
# iterate over the selected files
# iterate over the selected files
for i in index_range:
preprocessed_data_file = preprocessed_data_files[i]
file_object = data_files[i]
......@@ -78,7 +84,11 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
annotations = fs.get_annotations(annotation_list[i])
# call the preprocessor
preprocessed_data = preprocessor(data, annotations)
if metadata is None:
preprocessed_data = preprocessor(data, annotations)
else:
preprocessed_data = preprocessor(data, annotations, metadata=metadata[i])
if preprocessed_data is None:
if allow_missing_files:
logger.debug("... Processing original data file '%s' was not successful", file_name)
......@@ -90,8 +100,7 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
preprocessor.write_data(preprocessed_data, preprocessed_data_file)
else:
logger.debug("... Skipping original data file '%s' since preprocessed data '%s' exists", file_name, preprocessed_data_file)
logger.debug("... Skipping original data file '%s' since preprocessed data '%s' exists", file_name, preprocessed_data_file)
def read_preprocessed_data(file_names, preprocessor, split_by_client = False, allow_missing_files = False):
......
......@@ -5,12 +5,12 @@ import bob.measure
import numpy
import os, sys
import tarfile
import inspect
import logging
logger = logging.getLogger("bob.bio.base")
from .FileSelector import FileSelector
from .extractor import read_features
from .. import utils
def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
......@@ -27,13 +27,16 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
# if we have no model, all scores are undefined
return scores
# Checking if we need to ship the metadata in the scoring method
has_metadata = utils.is_argument_available("metadata", algorithm.score)
# Loops over the probe sets
for i, probe_element in enumerate(probes):
for i, probe_element, probe_metadata in zip(range(len(probes)), probes, probe_objects):
if fs.uses_probe_file_sets():
assert isinstance(probe_element, list)
# filter missing files
if allow_missing_files:
probe_element = utils.filter_missing_files(probe_element)
probe_element = utils.filter_missing_files(probe_element, probe_objects)
if not probe_element:
# we keep the NaN score
continue
......@@ -47,8 +50,13 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
continue
# read probe
probe = reader.read_feature(probe_element)
# compute score
scores[0,i] = algorithm.score(model, probe)
if has_metadata:
scores[0, i] = algorithm.score(model, probe, metadata=probe_metadata)
else:
scores[0, i] = algorithm.score(model, probe)
# Returns the scores
return scores
......
......@@ -7,7 +7,8 @@ from .resources import *
from .io import *
from .singleton import *
from . import processors
import six
import inspect
import numpy
def score_fusion_strategy(strategy_name = 'average'):
......@@ -53,6 +54,27 @@ def selected_elements(list_of_elements, desired_number_of_elements = None):
# sub-select
return [list_of_elements[i] for i in selected_indices(total_number_of_elements, desired_number_of_elements)]
def pretty_print(obj, kwargs):
"""Returns a pretty-print of the parameters to the constructor of a class, which should be able to copy-paste on the command line to create the object (with few exceptions)."""
return "%s(%s)" % (str(obj.__class__), ", ".join(["%s='%s'" % (key,value) if isinstance(value, str) else "%s=%s" % (key, value) for key,value in kwargs.items() if value is not None]))
def is_argument_available(argument, method):
"""
Check if an argument (or keyword argument) is available in a method
Attributes
----------
argument: str
The name of the argument (or keyword argument).
method:
Pointer to the method
"""
if six.PY2:
return argument in inspect.getargspec(method).args
else:
return argument in inspect.signature(method).parameters.keys()
......@@ -45,6 +45,13 @@ All of them implement the following two functions:
.. note::
When the database does not provide annotations, the ``annotations`` parameter might be ``None``.
.. note::
If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the preprocessor.
For that, the method ``__call__`` has a keyword called **metadata**.
If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
is shipped via this keyword argument.
By default, the data returned by the preprocessor is of type :py:class:`numpy.ndarray`.
In that case, the base class IO functionality can be used.
If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it overwrites further functions from :py:class:`bob.bio.base.preprocessor.Preprocessor` that define the IO of your class:
......@@ -72,6 +79,13 @@ All extractor classes provide at least the functions:
* ``__call__(self, data) -> feature``: Extracts the feature from the given preprocessed data.
By default, the returned feature should be a :py:class:`numpy.ndarray`.
.. note::
If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the extractor.
For that, the method ``__call__`` has a keyword called **metadata**.
If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
is shipped via this keyword argument.
If features are not of type :py:class:`numpy.ndarray`, the ``write_feature`` function is overridden.
In this case, also the function to read that kind of features needs to be overridden:
......@@ -180,6 +194,13 @@ These two functions are:
* ``score_for_multiple_models(self, models, probe)``: In case your model store several features, **call** this function to compute the average (or min, max, ...) of the scores.
* ``score_for_multiple_probes(self, model, probes)``: By default, the average (or min, max, ...) of the scores for all probes are computed. **Override** this function in case you want different behavior.
.. note::
If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the algorithm.
For that, the methods ``train_projector``, ``project``, ``enroll`` and ``score`` have a keyword called **metadata**.
If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
is shipped via this keyword argument.
Implemented Tools
-----------------
......
......@@ -95,15 +95,18 @@ setup(
'bob.bio.preprocessor': [
'dummy = bob.bio.base.test.dummy.preprocessor:preprocessor', # for test purposes only
'filename = bob.bio.base.config.preprocessor.filename:preprocessor',
'dummy_metadata = bob.bio.base.test.dummy.preprocessor:preprocessor_metadata',
],
'bob.bio.extractor': [
'dummy = bob.bio.base.test.dummy.extractor:extractor', # for test purposes only
'dummy_metadata = bob.bio.base.test.dummy.extractor:extractor_metadata', # for test purposes only
'linearize = bob.bio.base.config.extractor.linearize:extractor',
],
'bob.bio.algorithm': [
'dummy = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only
'dummy_metadata = bob.bio.base.test.dummy.algorithm:algorithm_metadata',
'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm',
'distance-cosine = bob.bio.base.config.algorithm.distance_cosine:algorithm',
'distance-hamming = bob.bio.base.config.algorithm.distance_hamming:algorithm',
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment