diff --git a/bob/bio/base/extractor/stacks.py b/bob/bio/base/extractor/stacks.py
index 511b7d04106cbf62af745bdaf6447fc9fa7a3a89..8bc5aee5db9f755fc6520d5b68b764c9ae811e50 100644
--- a/bob/bio/base/extractor/stacks.py
+++ b/bob/bio/base/extractor/stacks.py
@@ -1,5 +1,6 @@
 from ..utils.processors import SequentialProcessor, ParallelProcessor
 from .Extractor import Extractor
+from bob.io.base import HDF5File
 
 
 class MultipleExtractor(Extractor):
@@ -8,34 +9,35 @@ class MultipleExtractor(Extractor):
 
   def get_attributes(self, processors):
     requires_training = any((p.requires_training for p in processors))
-    split_training_data_by_client = any(
-        (p.split_training_data_by_client for p in processors))
-    min_extractor_file_size = min(
-        (p.min_extractor_file_size for p in processors))
-    min_feature_file_size = min(
-        (p.min_feature_file_size for p in processors))
+    split_training_data_by_client = any(p.split_training_data_by_client for p
+                                        in processors)
+    min_extractor_file_size = min(p.min_extractor_file_size for p in
+                                  processors)
+    min_feature_file_size = min(p.min_feature_file_size for p in processors)
     return (requires_training, split_training_data_by_client,
             min_extractor_file_size, min_feature_file_size)
 
-  def get_extractor_files(self, extractor_file):
-    paths = [extractor_file]
-    paths += [extractor_file +
-              '_{}.hdf5'.format(i) for i in range(1, len(self.processors))]
-    return paths
+  def get_extractor_groups(self):
+    groups = ['E_{}'.format(i + 1) for i in range(len(self.processors))]
+    return groups
 
   def train_one(self, e, training_data, extractor_file, apply=False):
     if not e.requires_training:
       return
+    # if any of the extractors require splitting the data, the
+    # split_training_data_by_client is True.
     if e.split_training_data_by_client:
       e.train(training_data, extractor_file)
       if not apply:
         return
       training_data = [[e(d) for d in datalist] for datalist in training_data]
+    # when no extractor needs splitting
     elif not self.split_training_data_by_client:
       e.train(training_data, extractor_file)
       if not apply:
         return
       training_data = [e(d) for d in training_data]
+    # when e here wants it flat but the data is split
     else:
       # make training_data flat
       training_data_len = [len(datalist) for datalist in training_data]
@@ -55,9 +57,12 @@ class MultipleExtractor(Extractor):
     return training_data
 
   def load(self, extractor_file):
-    paths = self.get_extractor_files(extractor_file)
-    for e, path in zip(self.processors, paths):
-      e.load(path)
+    with HDF5File(extractor_file) as f:
+      groups = self.get_extractor_groups()
+      for e, group in zip(self.processors, groups):
+        f.cd(group)
+        e.load(f)
+        f.cd('..')
 
 
 class SequentialExtractor(SequentialProcessor, MultipleExtractor):
@@ -66,21 +71,24 @@ class SequentialExtractor(SequentialProcessor, MultipleExtractor):
   def __init__(self, processors):
 
     (requires_training, split_training_data_by_client,
-     min_extractor_file_size, min_feature_file_size) = self.get_attributes(
-        processors)
+     min_extractor_file_size, min_feature_file_size) = \
+        self.get_attributes(processors)
 
-    SequentialProcessor.__init__(self, processors)
-    MultipleExtractor.__init__(
-        self,
+    super(SequentialExtractor, self).__init__(
+        processors=processors,
         requires_training=requires_training,
         split_training_data_by_client=split_training_data_by_client,
         min_extractor_file_size=min_extractor_file_size,
         min_feature_file_size=min_feature_file_size)
 
   def train(self, training_data, extractor_file):
-    paths = self.get_extractor_files(extractor_file)
-    for e, path in zip(self.processors, paths):
-      training_data = self.train_one(e, training_data, path, apply=True)
+    with HDF5File(extractor_file, 'w') as f:
+      groups = self.get_extractor_groups()
+      for e, group in zip(self.processors, groups):
+        f.create_group(group)
+        f.cd(group)
+        training_data = self.train_one(e, training_data, f, apply=True)
+        f.cd('..')
 
 
 class ParallelExtractor(ParallelProcessor, MultipleExtractor):
@@ -92,18 +100,21 @@ class ParallelExtractor(ParallelProcessor, MultipleExtractor):
      min_extractor_file_size, min_feature_file_size) = self.get_attributes(
         processors)
 
-    ParallelProcessor.__init__(self, processors)
-    MultipleExtractor.__init__(
-        self,
+    super(ParallelExtractor, self).__init__(
+        processors=processors,
         requires_training=requires_training,
         split_training_data_by_client=split_training_data_by_client,
         min_extractor_file_size=min_extractor_file_size,
         min_feature_file_size=min_feature_file_size)
 
   def train(self, training_data, extractor_file):
-    paths = self.get_extractor_files(extractor_file)
-    for e, path in zip(self.processors, paths):
-      self.train_one(e, training_data, path)
+    with HDF5File(extractor_file, 'w') as f:
+      groups = self.get_extractor_groups()
+      for e, group in zip(self.processors, groups):
+        f.create_group(group)
+        f.cd(group)
+        self.train_one(e, training_data, f, apply=False)
+        f.cd('..')
 
 
 class CallableExtractor(Extractor):
diff --git a/bob/bio/base/preprocessor/stacks.py b/bob/bio/base/preprocessor/stacks.py
index 1c8be13db4a6e48a1a396bdca0e207360504a0be..af96a736f02dc39b8deaaa25dd072b79e0428a8c 100644
--- a/bob/bio/base/preprocessor/stacks.py
+++ b/bob/bio/base/preprocessor/stacks.py
@@ -26,12 +26,8 @@ class ParallelPreprocessor(ParallelProcessor, Preprocessor):
   __doc__ = ParallelProcessor.__doc__
 
   def __init__(self, processors, **kwargs):
-    min_preprocessed_file_size = 1000
-    try:
-      min_preprocessed_file_size = min(
-          (p.min_preprocessed_file_size for p in processors))
-    except AttributeError:
-      pass
+    min_preprocessed_file_size = min((p.min_preprocessed_file_size for p in
+                                      processors))
 
     ParallelProcessor.__init__(self, processors)
     Preprocessor.__init__(
@@ -48,14 +44,21 @@ class CallablePreprocessor(Preprocessor):
 
   Attributes
   ----------
+  accepts_annotations : bool
+      If False, annotations are not passed to the callable.
   callable : object
       Anything that is callable. It will be used as a preprocessor in
       bob.bio.base.
   """
 
-  def __init__(self, callable, **kwargs):
-    super(CallablePreprocessor, self).__init__(**kwargs)
+  def __init__(self, callable, accepts_annotations=True, **kwargs):
+    super(CallablePreprocessor, self).__init__(
+        callable=callable, accepts_annotations=accepts_annotations, **kwargs)
     self.callable = callable
+    self.accepts_annotations = accepts_annotations
 
   def __call__(self, data, annotations):
-    return self.callable(data)
+    if self.accepts_annotations:
+      return self.callable(data, annotations)
+    else:
+      return self.callable(data)
diff --git a/bob/bio/base/test/test_stacks.py b/bob/bio/base/test/test_stacks.py
index 3db57f56b14e74cd71584603c01d623f3d212166..3628902de8f39dba21d79e4a5a0498b940cd22e6 100644
--- a/bob/bio/base/test/test_stacks.py
+++ b/bob/bio/base/test/test_stacks.py
@@ -24,7 +24,7 @@ def test_processors():
 
 
 def test_preprocessors():
-  processors = [CallablePreprocessor(p) for p in PROCESSORS]
+  processors = [CallablePreprocessor(p, False) for p in PROCESSORS]
   proc = SequentialPreprocessor(processors)
   data = proc(DATA, None)
   assert np.allclose(data, SEQ_DATA)
diff --git a/bob/bio/base/utils/processors.py b/bob/bio/base/utils/processors.py
index 64549d767fd07dacb5d7de329ee7f604472d5dee..03dd2e98cf8f6eb12d7e6b69cdfccc55af6e717c 100644
--- a/bob/bio/base/utils/processors.py
+++ b/bob/bio/base/utils/processors.py
@@ -11,7 +11,7 @@ class SequentialProcessor(object):
       A list of processors to apply.
   """
 
-  def __init__(self, processors):
+  def __init__(self, processors, **kwargs):
     super(SequentialProcessor, self).__init__()
     self.processors = processors
 
@@ -32,10 +32,7 @@ class SequentialProcessor(object):
         The processed data.
     """
     for processor in self.processors:
-      try:
-        data = processor(data, **kwargs)
-      except ValueError:
-        data = processor(data)
+      data = processor(data, **kwargs)
     return data
 
 
@@ -51,7 +48,7 @@ class ParallelProcessor(object):
       If True (default), :any:`numpy.hstack` is called on the list of outputs.
   """
 
-  def __init__(self, processors, stack=True):
+  def __init__(self, processors, stack=True, **kwargs):
     super(ParallelProcessor, self).__init__()
     self.processors = processors
     self.stack = stack
@@ -74,10 +71,7 @@ class ParallelProcessor(object):
     """
     output = []
     for processor in self.processors:
-      try:
-        out = processor(data, **kwargs)
-      except ValueError:
-        out = processor(data)
+      out = processor(data, **kwargs)
       output.append(out)
     if self.stack:
       output = numpy.hstack(output)