diff --git a/bob/pad/base/database/database.py b/bob/pad/base/database/database.py
index 5be494f12bd48755c3c13aa5d028794528fe93b5..ae319880bb1791b3f74030782538430402a61407 100644
--- a/bob/pad/base/database/database.py
+++ b/bob/pad/base/database/database.py
@@ -85,12 +85,24 @@ class PadDatabase(BioDatabase):
         """
         return []
 
+    @abc.abstractmethod
     def annotations(self, file):
         """
-        Annotations are not supported by PAD interface
+        Returns the annotations for the given File object, if available.
+        You need to override this method in your high-level implementation.
+        If your database does not have annotations, it should return ``None``.
+
+        **Parameters:**
+
+        file : :py:class:`bob.pad.base.database.PadFile`
+          The file for which annotations should be returned.
 
+        **Returns:**
+
+        annots : dict or None
+          The annotations for the file, if available.
         """
-        return None
+        raise NotImplementedError("This function must be implemented in your derived class.")
 
     @abc.abstractmethod
     def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs):
diff --git a/bob/pad/base/test/dummy/database.py b/bob/pad/base/test/dummy/database.py
index 19b3ec7b7c995c5d9dc0513695031ddb31b636e1..a48c301c525bcccc2491aee7776395476742aec1 100644
--- a/bob/pad/base/test/dummy/database.py
+++ b/bob/pad/base/test/dummy/database.py
@@ -190,5 +190,8 @@ class TestDatabase(PadDatabase):
                 fileset += [TestFile(dummy_test_list[1], 6)]
         return fileset
 
+    def annotations(self, file):
+        return None
+
 
 database = TestDatabase(original_directory=data_dir, original_extension='')
diff --git a/bob/pad/base/test/dummy/database_sql.py b/bob/pad/base/test/dummy/database_sql.py
index 969e73313e347c3b19f39aaa5c17ef36b942c532..0d9e10105cc34f8375f460fadfd64818a84ae032 100644
--- a/bob/pad/base/test/dummy/database_sql.py
+++ b/bob/pad/base/test/dummy/database_sql.py
@@ -56,4 +56,7 @@ class TestDatabaseSql (bob.pad.base.database.PadDatabase, bob.db.base.SQLiteData
     def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs):
         return list(self.query(TestFileSql))
 
+    def annotations(self, file):
+        return None
+
 database = TestDatabaseSql()
\ No newline at end of file
diff --git a/bob/pad/base/test/dummy/extractor.py b/bob/pad/base/test/dummy/extractor.py
index 8e65a74bfc27ad9bb5b87df83b7fc4dcc329540c..39959a5ad2663c5cb8b778fdf276824bb10d0892 100644
--- a/bob/pad/base/test/dummy/extractor.py
+++ b/bob/pad/base/test/dummy/extractor.py
@@ -18,4 +18,7 @@ class DummyExtractor(Extractor):
         assert (data in _data)
         return data + 1.0
 
+    def train(self, training_data, extractor_file):
+        pass
+
 extractor = DummyExtractor()
diff --git a/bob/pad/base/tools/FileSelector.py b/bob/pad/base/tools/FileSelector.py
index 79401f65d12140875cb67cf273310bed18239a6b..d011e78583044d28d3739e1e3f4b4675b7a2979e 100644
--- a/bob/pad/base/tools/FileSelector.py
+++ b/bob/pad/base/tools/FileSelector.py
@@ -99,17 +99,22 @@ class FileSelector(object):
             return [realpaths, attackpaths]
 
     # List of files that will be used for all files
-    def original_data_list(self, groups=None):
-        """Returns the the joint list of original (real and attack) file names."""
-        return self.database.original_file_names(self.database.all_files(groups=groups))
-
-    def original_data_list_files(self, groups=None):
-        """Returns the joint list of original (real and attack) data files that can be used for preprocessing."""
+    def original_data_list(self, groups = None):
+        """Returns the list of original ``PadFile`` objects that can be used for preprocessing."""
         files = self.database.all_files(groups=groups)
         if len(files) != 2:
             fileset = files
         else:
             fileset = files[0]+files[1]
+        return fileset
+
+    def original_directory_and_extension(self):
+        """Returns the directory and extension of the original files."""
+        return self.database.original_directory, self.database.original_extension
+
+    def original_data_list_files(self, groups=None):
+        """Returns the joint list of original (real and attack) data files that can be used for preprocessing."""
+        fileset = self.original_data_list(groups=groups)
         return fileset, self.database.original_directory, self.database.original_extension
 
     def preprocessed_data_list(self, groups=None):
@@ -125,12 +130,15 @@ class FileSelector(object):
         return self.get_paths(self.database.all_files(groups=groups), "projected")
 
     # Training lists
-    def training_list(self, directory_type, step):
-        """Returns the tuple of lists (real, attacks) of features that should be used for projector training.
-        The directory_type might be any of 'preprocessed', 'extracted', or 'projected'.
-        The step might by any of 'train_extractor', 'train_projector', or 'train_enroller'.
+    def training_list(self, directory_type, step, combined=False):
+        """
+        Returns a list of lists (real, attacks) or just list of all real and
+        attack features depending on combined that should be used for projector
+        training. The directory_type might be any of 'preprocessed',
+        'extracted', or 'projected'. The step might by any of
+        'train_extractor', 'train_projector', or 'train_enroller'.
         """
-        return self.get_paths(self.database.training_files(step), directory_type, False)
+        return self.get_paths(self.database.training_files(step), directory_type, combined)
 
     def toscore_objects(self, group):
         """Returns the File objects used to compute the raw scores."""
@@ -147,3 +155,14 @@ class FileSelector(object):
         no_norm_dir = self.score_directories[0]
         return os.path.join(no_norm_dir, "scores-" + group + "-" + obj_type) + self.compressed_extension
 
+    def annotation_list(self, groups=None):
+        """Returns the list of annotations objects."""
+        files = self.database.all_files(groups=groups)
+        if len(files) != 2:
+            return files
+        else:
+            return files[0] + files[1]
+
+    def get_annotations(self, annotation_file):
+        """Returns the annotations of the given file."""
+        return self.database.annotations(annotation_file)
diff --git a/bob/pad/base/tools/algorithm.py b/bob/pad/base/tools/algorithm.py
index 0bd4cc3c17df51b98682e5b98ecde1115936e3ab..6a52b9ba51b9ab202034d004aea6bd164050f59c 100644
--- a/bob/pad/base/tools/algorithm.py
+++ b/bob/pad/base/tools/algorithm.py
@@ -52,7 +52,7 @@ def train_projector(algorithm, extractor, allow_missing_files=False, force=False
         # train projector
         logger.info("- Projection: loading training data")
         train_files = fs.training_list('extracted', 'train_projector')
-        train_features = read_features(train_files, extractor)
+        train_features = read_features(train_files, extractor, True, allow_missing_files)
         logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file,
                     len(train_files))
 
diff --git a/bob/pad/base/tools/command_line.py b/bob/pad/base/tools/command_line.py
index 8146a5827629c1bba46a342461df6236064b42ed..906fc7c5ac4e563910b91c6787209b3488613ac9 100644
--- a/bob/pad/base/tools/command_line.py
+++ b/bob/pad/base/tools/command_line.py
@@ -213,8 +213,10 @@ def initialize(parsers, command_line_parameters=None, skips=[]):
     args : namespace
       A namespace of arguments as read from the command line.
 
-      .. note:: The database, preprocessor, extractor, algorithm and grid (if specified) are actual
-      instances of the according classes.
+      .. note::
+
+         The database, preprocessor, extractor, algorithm and grid (if specified) are actual
+         instances of the according classes.
     """
 
     # execute-only
diff --git a/bob/pad/base/tools/extractor.py b/bob/pad/base/tools/extractor.py
index f67783acf909fff5bf32c180ba959b106876ceb3..b48872e9f77ba8ff66d316ea02b287436ea98ea5 100644
--- a/bob/pad/base/tools/extractor.py
+++ b/bob/pad/base/tools/extractor.py
@@ -16,6 +16,7 @@ logger = logging.getLogger("bob.pad.base")
 from .FileSelector import FileSelector
 from bob.bio.base import utils
 from .preprocessor import read_preprocessed_data
+from bob.bio.base.tools.extractor import read_features
 
 
 def train_extractor(extractor, preprocessor, allow_missing_files=False, force=False):
@@ -41,7 +42,6 @@ def train_extractor(extractor, preprocessor, allow_missing_files=False, force=Fa
     force : bool
       If given, the extractor file is regenerated, even if it already exists.
     """
-
     if not extractor.requires_training:
         logger.warn(
             "The train_extractor function should not have been called, since the extractor does not need training.")
@@ -52,17 +52,25 @@ def train_extractor(extractor, preprocessor, allow_missing_files=False, force=Fa
     # the file to write
     if utils.check_file(fs.extractor_file, force,
                         extractor.min_extractor_file_size):
-        logger.info("- Extraction: extractor '%s' already exists.", fs.extractor_file)
+        logger.info("- Extraction: extractor '%s' already exists.",
+                    fs.extractor_file)
     else:
         bob.io.base.create_directories_safe(os.path.dirname(fs.extractor_file))
         # read training files
-        train_files = fs.training_list('preprocessed', 'train_extractor')
-        train_data = read_preprocessed_data(train_files, preprocessor)
-        logger.info("- Extraction: training extractor '%s' using %d training files:", fs.extractor_file,
-                    len(train_files))
+        train_files = fs.training_list(
+            'preprocessed', 'train_extractor', combined=~extractor.split_training_data_by_client)
+        train_data = read_preprocessed_data(
+            train_files, preprocessor, extractor.split_training_data_by_client, allow_missing_files)
+        if extractor.split_training_data_by_client:
+            logger.info("- Extraction: training extractor '%s' using %d classes:",
+                        fs.extractor_file, len(train_files))
+        else:
+            logger.info("- Extraction: training extractor '%s' using %d training files:",
+                        fs.extractor_file, len(train_files))
         # train model
         extractor.train(train_data, fs.extractor_file)
 
+
 def extract(extractor, preprocessor, groups=None, indices=None, allow_missing_files=False, force=False):
     """Extracts features from the preprocessed data using the given extractor.
 
@@ -87,6 +95,9 @@ def extract(extractor, preprocessor, groups=None, indices=None, allow_missing_fi
       If specified, only the features for the given index range ``range(begin, end)`` should be extracted.
       This is usually given, when parallel threads are executed.
 
+    allow_missing_files : bool
+      If set to ``True``, preprocessed data files that are not found are silently ignored.
+
     force : bool
       If given, files are regenerated, even if they already exist.
     """
@@ -97,7 +108,7 @@ def extract(extractor, preprocessor, groups=None, indices=None, allow_missing_fi
     feature_files = fs.feature_list(groups=groups)
 
     # select a subset of indices to iterate
-    if indices != None:
+    if indices is not None:
         index_range = range(indices[0], indices[1])
         logger.info("- Extraction: splitting of index range %s" % str(indices))
     else:
@@ -106,44 +117,42 @@ def extract(extractor, preprocessor, groups=None, indices=None, allow_missing_fi
     logger.info("- Extraction: extracting %d features from directory '%s' to directory '%s'", len(index_range),
                 fs.directories['preprocessed'], fs.directories['extracted'])
     for i in index_range:
-        data_file = str(data_files[i])
-        feature_file = str(feature_files[i])
-
-        if not utils.check_file(feature_file, force, 1000):
+        data_file = data_files[i]
+        feature_file = feature_files[i]
+
+        if not os.path.exists(data_file) and preprocessor.writes_data:
+            if allow_missing_files:
+                logger.debug(
+                    "... Cannot find preprocessed data file %s; skipping", data_file)
+                continue
+            else:
+                logger.error(
+                    "Cannot find preprocessed data file %s", data_file)
+
+        if not utils.check_file(feature_file, force,
+                                extractor.min_feature_file_size):
+            logger.debug(
+                "... Extracting features for data file '%s'", data_file)
+            # create output directory before reading the data file (is
+            # sometimes required, when relative directories are specified,
+            # especially, including a .. somewhere)
+            bob.io.base.create_directories_safe(os.path.dirname(feature_file))
             # load data
             data = preprocessor.read_data(data_file)
             # extract feature
-            try:
-                logger.info("- Extraction: extracting from file: %s", data_file)
-                feature = extractor(data)
-            except ValueError:
-                logger.warn("WARNING: empty data in file %s", data_file)
-                feature = 0
-            # write feature
-            if feature is not None:
-                bob.io.base.create_directories_safe(os.path.dirname(feature_file))
-                extractor.write_feature(feature, feature_file)
-
-
-def read_features(file_names, extractor):
-    """read_features(file_names, extractor) -> extracted
+            feature = extractor(data)
 
-    Reads the extracted features from ``file_names`` using the given ``extractor``.
+            if feature is None:
+                if allow_missing_files:
+                    logger.debug(
+                        "... Feature extraction for data file %s failed; skipping", data_file)
+                    continue
+                else:
+                    raise RuntimeError(
+                        "Feature extraction  of file '%s' was not successful", data_file)
 
-    **Parameters:**
-
-    file_names : [[str], [str]]
-      A list of lists of file names (real, attack) to be read.
-
-    extractor : py:class:`bob.bio.base.extractor.Extractor` or derived
-      The extractor, used for reading the extracted features.
-
-    **Returns:**
-
-    extracted : [object] or [[object]]
-      The list of extracted features, in the same order as in the ``file_names``.
-    """
-    real_files = file_names[0]
-    attack_files = file_names[1]
-    return [[extractor.read_feature(str(f)) for f in real_files],
-            [extractor.read_feature(str(f)) for f in attack_files]]
+            # write feature
+            extractor.write_feature(feature, feature_file)
+        else:
+            logger.debug(
+                "... Skipping preprocessed data '%s' since feature file '%s' exists", data_file, feature_file)
diff --git a/bob/pad/base/tools/preprocessor.py b/bob/pad/base/tools/preprocessor.py
index 9ce6ee6630538f007f9e51b5a29d011495777c43..0a4784f252e1020985a59b5eed18cdaaa370435f 100644
--- a/bob/pad/base/tools/preprocessor.py
+++ b/bob/pad/base/tools/preprocessor.py
@@ -11,10 +11,11 @@ import os
 
 import logging
 
-logger = logging.getLogger("bob.pad.base")
-
 from .FileSelector import FileSelector
 from bob.bio.base import utils
+from bob.bio.base.tools import read_preprocessed_data
+
+logger = logging.getLogger("bob.pad.base")
 
 
 def preprocess(preprocessor, groups=None, indices=None, allow_missing_files=False, force=False):
@@ -26,7 +27,7 @@ def preprocess(preprocessor, groups=None, indices=None, allow_missing_files=Fals
 
     **Parameters:**
 
-    preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived.
+    preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived
       The preprocessor, which should be applied to all data.
 
     groups : some of ``('train', 'dev', 'eval')`` or ``None``
@@ -36,67 +37,76 @@ def preprocess(preprocessor, groups=None, indices=None, allow_missing_files=Fals
       If specified, only the data for the given index range ``range(begin, end)`` should be preprocessed.
       This is usually given, when parallel threads are executed.
 
+    allow_missing_files : bool
+      If set to ``True``, files for which the preprocessor returns ``None`` are silently ignored.
+
     force : bool
       If given, files are regenerated, even if they already exist.
     """
+    if not preprocessor.writes_data:
+        # The preprocessor does not write anything, so no need to call it
+        logger.info(
+            "Skipping preprocessing as preprocessor does not write any data")
+        return
+
     # the file selector object
     fs = FileSelector.instance()
 
     # get the file lists
-    data_files, original_directory, original_extension = fs.original_data_list_files(groups=groups)
+    data_files = fs.original_data_list(groups=groups)
+    original_directory, original_extension = fs.original_directory_and_extension()
     preprocessed_data_files = fs.preprocessed_data_list(groups=groups)
 
     # select a subset of keys to iterate
     if indices is not None:
         index_range = range(indices[0], indices[1])
-        logger.info("- Preprocessing: splitting of index range %s", str(indices))
+        logger.info(
+            "- Preprocessing: splitting of index range %s", str(indices))
     else:
         index_range = range(len(data_files))
 
-    logger.info("- Preprocessing: processing %d data files from directory '%s' to directory '%s'", len(index_range),
-                fs.directories['original'], fs.directories['preprocessed'])
+    logger.info("- Preprocessing: processing %d data files from directory '%s' to directory '%s'",
+                len(index_range), fs.directories['original'], fs.directories['preprocessed'])
+
+    # read annotation files
+    annotation_list = fs.annotation_list(groups=groups)
 
     # iterate over the selected files
     for i in index_range:
-        preprocessed_data_file = str(preprocessed_data_files[i])
+        preprocessed_data_file = preprocessed_data_files[i]
         file_object = data_files[i]
-        file_name = file_object.make_path(original_directory, original_extension)
+        file_name = file_object.make_path(
+            original_directory, original_extension)
 
         # check for existence
-        if not utils.check_file(preprocessed_data_file, force, 1000):
-            logger.info("... Processing original data file '%s'", file_name)
-            data = preprocessor.read_original_data(file_object, original_directory, original_extension)
-            # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere)
-            bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file))
+        if not utils.check_file(preprocessed_data_file, force,
+                                preprocessor.min_preprocessed_file_size):
+            logger.debug("... Processing original data file '%s'", file_name)
+            data = preprocessor.read_original_data(
+                file_object, original_directory, original_extension)
+            # create output directory before reading the data file (is
+            # sometimes required, when relative directories are specified,
+            # especially, including a .. somewhere)
+            bob.io.base.create_directories_safe(
+                os.path.dirname(preprocessed_data_file))
+
+            # get the annotations; might be None
+            annotations = fs.get_annotations(annotation_list[i])
 
             # call the preprocessor
-            preprocessed_data = preprocessor(data, None)
+            preprocessed_data = preprocessor(data, annotations)
             if preprocessed_data is None:
-                logger.error("Preprocessing of file '%s' was not successful", file_name)
-                continue
+                if allow_missing_files:
+                    logger.debug(
+                        "... Processing original data file '%s' was not successful", file_name)
+                    continue
+                else:
+                     raise RuntimeError(
+                        "Preprocessing of file '%s' was not successful", file_name)
 
             # write the data
             preprocessor.write_data(preprocessed_data, preprocessed_data_file)
 
-
-def read_preprocessed_data(file_names, preprocessor):
-    """read_preprocessed_data(file_names, preprocessor, split_by_client = False) -> preprocessed
-
-    Reads the preprocessed data from ``file_names`` using the given preprocessor.
-    If ``split_by_client`` is set to ``True``, it is assumed that the ``file_names`` are already sorted by client.
-
-    **Parameters:**
-
-    file_names : [str] or [[str]]
-      A list of names of files to be read.
-      If ``split_by_client = True``, file names are supposed to be split into groups.
-
-    preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived
-      The preprocessor, which can read the preprocessed data.
-
-    **Returns:**
-
-    preprocessed : [object] or [[object]]
-      The list of preprocessed data, in the same order as in the ``file_names``.
-    """
-    return [preprocessor.read_data(str(f)) for f in file_names]
+        else:
+            logger.debug("... Skipping original data file '%s' since preprocessed data '%s' exists",
+                         file_name, preprocessed_data_file)