use 4 spaces. Implement IO for sequential preprocessors and extractors

4151b396 · Amir MOHAMMADI · 8b031ce9 · 4151b396 · 4151b396 · 4151b396
Commit 4151b396 authored 7 years ago by Amir MOHAMMADI
--- a/bob/bio/base/extractor/stacks.py
+++ b/bob/bio/base/extractor/stacks.py
@@ -4,133 +4,141 @@ from bob.io.base import HDF5File


 class MultipleExtractor(Extractor):
-  """Base class for SequentialExtractor and ParallelExtractor. This class is
-  not meant to be used directly."""
-
-  def get_attributes(self, processors):
-    requires_training = any(p.requires_training for p in processors)
-    split_training_data_by_client = any(p.split_training_data_by_client for p
-                                        in processors)
-    min_extractor_file_size = min(p.min_extractor_file_size for p in
-                                  processors)
-    min_feature_file_size = min(p.min_feature_file_size for p in processors)
-    return (requires_training, split_training_data_by_client,
-            min_extractor_file_size, min_feature_file_size)
-
-  def get_extractor_groups(self):
-    groups = ['E_{}'.format(i + 1) for i in range(len(self.processors))]
-    return groups
-
-  def train_one(self, e, training_data, extractor_file, apply=False):
-    if not e.requires_training:
-      return
-    # if any of the extractors require splitting the data, the
-    # split_training_data_by_client is True.
-    if e.split_training_data_by_client:
-      e.train(training_data, extractor_file)
-      if not apply:
-        return
-      training_data = [[e(d) for d in datalist] for datalist in training_data]
-    # when no extractor needs splitting
-    elif not self.split_training_data_by_client:
-      e.train(training_data, extractor_file)
-      if not apply:
-        return
-      training_data = [e(d) for d in training_data]
-    # when e here wants it flat but the data is split
-    else:
-      # make training_data flat
-      training_data_len = [len(datalist) for datalist in training_data]
-      training_data = [d for datalist in training_data for d in datalist]
-      e.train(training_data, extractor_file)
-      if not apply:
-        return
-      # split training data
-      new_training_data, i = [], 0
-      for length in training_data_len:
-        class_data = []
-        for _ in range(length):
-          class_data.append(e(training_data[i]))
-          i += 1
-        new_training_data.append(class_data)
-      training_data = new_training_data
-    return training_data
-
-  def load(self, extractor_file):
-    with HDF5File(extractor_file) as f:
-      groups = self.get_extractor_groups()
-      for e, group in zip(self.processors, groups):
-        f.cd(group)
-        e.load(f)
-        f.cd('..')
+    """Base class for SequentialExtractor and ParallelExtractor. This class is
+    not meant to be used directly."""
+
+    def get_attributes(self, processors):
+        requires_training = any(p.requires_training for p in processors)
+        split_training_data_by_client = any(p.split_training_data_by_client for
+                                            p in processors)
+        min_extractor_file_size = min(p.min_extractor_file_size for p in
+                                      processors)
+        min_feature_file_size = min(
+            p.min_feature_file_size for p in processors)
+        return (requires_training, split_training_data_by_client,
+                min_extractor_file_size, min_feature_file_size)
+
+    def get_extractor_groups(self):
+        groups = ['E_{}'.format(i + 1) for i in range(len(self.processors))]
+        return groups
+
+    def train_one(self, e, training_data, extractor_file, apply=False):
+        if not e.requires_training:
+            return
+        # if any of the extractors require splitting the data, the
+        # split_training_data_by_client is True.
+        if e.split_training_data_by_client:
+            e.train(training_data, extractor_file)
+            if not apply:
+                return
+            training_data = [[e(d) for d in datalist]
+                             for datalist in training_data]
+        # when no extractor needs splitting
+        elif not self.split_training_data_by_client:
+            e.train(training_data, extractor_file)
+            if not apply:
+                return
+            training_data = [e(d) for d in training_data]
+        # when e here wants it flat but the data is split
+        else:
+            # make training_data flat
+            training_data_len = [len(datalist) for datalist in training_data]
+            training_data = [d for datalist in training_data for d in datalist]
+            e.train(training_data, extractor_file)
+            if not apply:
+                return
+            # split training data
+            new_training_data, i = [], 0
+            for length in training_data_len:
+                class_data = []
+                for _ in range(length):
+                    class_data.append(e(training_data[i]))
+                    i += 1
+                new_training_data.append(class_data)
+            training_data = new_training_data
+        return training_data
+
+    def load(self, extractor_file):
+        with HDF5File(extractor_file) as f:
+            groups = self.get_extractor_groups()
+            for e, group in zip(self.processors, groups):
+                f.cd(group)
+                e.load(f)
+                f.cd('..')


 class SequentialExtractor(SequentialProcessor, MultipleExtractor):
-  __doc__ = SequentialProcessor.__doc__
+    __doc__ = SequentialProcessor.__doc__

-  def __init__(self, processors):
+    def __init__(self, processors):

-    (requires_training, split_training_data_by_client,
-     min_extractor_file_size, min_feature_file_size) = \
-        self.get_attributes(processors)
+        (requires_training, split_training_data_by_client,
+         min_extractor_file_size, min_feature_file_size) = \
+            self.get_attributes(processors)

-    super(SequentialExtractor, self).__init__(
-        processors=processors,
-        requires_training=requires_training,
-        split_training_data_by_client=split_training_data_by_client,
-        min_extractor_file_size=min_extractor_file_size,
-        min_feature_file_size=min_feature_file_size)
+        super(SequentialExtractor, self).__init__(
+            processors=processors,
+            requires_training=requires_training,
+            split_training_data_by_client=split_training_data_by_client,
+            min_extractor_file_size=min_extractor_file_size,
+            min_feature_file_size=min_feature_file_size)

-  def train(self, training_data, extractor_file):
-    with HDF5File(extractor_file, 'w') as f:
-      groups = self.get_extractor_groups()
-      for e, group in zip(self.processors, groups):
-        f.create_group(group)
-        f.cd(group)
-        training_data = self.train_one(e, training_data, f, apply=True)
-        f.cd('..')
+    def train(self, training_data, extractor_file):
+        with HDF5File(extractor_file, 'w') as f:
+            groups = self.get_extractor_groups()
+            for e, group in zip(self.processors, groups):
+                f.create_group(group)
+                f.cd(group)
+                training_data = self.train_one(e, training_data, f, apply=True)
+                f.cd('..')
+
+    def read_feature(self, feature_file):
+        return self.processors[-1].read_feature(feature_file)
+
+    def write_feature(self, feature, feature_file):
+        return self.processors[-1].write_feature(feature, feature_file)


 class ParallelExtractor(ParallelProcessor, MultipleExtractor):
-  __doc__ = ParallelProcessor.__doc__
+    __doc__ = ParallelProcessor.__doc__

-  def __init__(self, processors):
+    def __init__(self, processors):

-    (requires_training, split_training_data_by_client,
-     min_extractor_file_size, min_feature_file_size) = self.get_attributes(
-        processors)
+        (requires_training, split_training_data_by_client,
+         min_extractor_file_size, min_feature_file_size) = self.get_attributes(
+            processors)

-    super(ParallelExtractor, self).__init__(
-        processors=processors,
-        requires_training=requires_training,
-        split_training_data_by_client=split_training_data_by_client,
-        min_extractor_file_size=min_extractor_file_size,
-        min_feature_file_size=min_feature_file_size)
+        super(ParallelExtractor, self).__init__(
+            processors=processors,
+            requires_training=requires_training,
+            split_training_data_by_client=split_training_data_by_client,
+            min_extractor_file_size=min_extractor_file_size,
+            min_feature_file_size=min_feature_file_size)

-  def train(self, training_data, extractor_file):
-    with HDF5File(extractor_file, 'w') as f:
-      groups = self.get_extractor_groups()
-      for e, group in zip(self.processors, groups):
-        f.create_group(group)
-        f.cd(group)
-        self.train_one(e, training_data, f, apply=False)
-        f.cd('..')
+    def train(self, training_data, extractor_file):
+        with HDF5File(extractor_file, 'w') as f:
+            groups = self.get_extractor_groups()
+            for e, group in zip(self.processors, groups):
+                f.create_group(group)
+                f.cd(group)
+                self.train_one(e, training_data, f, apply=False)
+                f.cd('..')


 class CallableExtractor(Extractor):
-  """A simple extractor that takes a callable and applies that callable to the
-  input.
-
-  Attributes
-  ----------
-  callable : object
-      Anything that is callable. It will be used as an extractor in
-      bob.bio.base.
-  """
-
-  def __init__(self, callable, **kwargs):
-    super(CallableExtractor, self).__init__(**kwargs)
-    self.callable = callable
-
-  def __call__(self, data):
-    return self.callable(data)
+    """A simple extractor that takes a callable and applies that callable to
+    the input.
+
+    Attributes
+    ----------
+    callable : object
+        Anything that is callable. It will be used as an extractor in
+        bob.bio.base.
+    """
+
+    def __init__(self, callable, **kwargs):
+        super(CallableExtractor, self).__init__(**kwargs)
+        self.callable = callable
+
+    def __call__(self, data):
+        return self.callable(data)
--- a/bob/bio/base/preprocessor/stacks.py
+++ b/bob/bio/base/preprocessor/stacks.py
@@ -3,62 +3,71 @@ from .Preprocessor import Preprocessor


 class SequentialPreprocessor(SequentialProcessor, Preprocessor):
-  __doc__ = SequentialProcessor.__doc__
+    __doc__ = SequentialProcessor.__doc__

-  def __init__(self, processors, **kwargs):
-    min_preprocessed_file_size = 1000
-    try:
-      min_preprocessed_file_size = min(
-          (p.min_preprocessed_file_size for p in processors))
-    except AttributeError:
-      pass
+    def __init__(self, processors, **kwargs):
+        min_preprocessed_file_size = 1000
+        try:
+            min_preprocessed_file_size = min(
+                (p.min_preprocessed_file_size for p in processors))
+        except AttributeError:
+            pass

-    SequentialProcessor.__init__(self, processors)
-    Preprocessor.__init__(
-        self, min_preprocessed_file_size=min_preprocessed_file_size, **kwargs)
+        SequentialProcessor.__init__(self, processors)
+        Preprocessor.__init__(
+            self, min_preprocessed_file_size=min_preprocessed_file_size,
+            **kwargs)

-  def __call__(self, data, annotations):
-    return super(SequentialPreprocessor, self).__call__(
-        data, annotations=annotations)
+    def __call__(self, data, annotations):
+        return super(SequentialPreprocessor, self).__call__(
+            data, annotations=annotations)
+
+    def read_data(self, data_file):
+        return self.processors[-1].read_data(data_file)
+
+    def write_data(self, data, data_file):
+        return self.processors[-1].write_data(data, data_file)


 class ParallelPreprocessor(ParallelProcessor, Preprocessor):
-  __doc__ = ParallelProcessor.__doc__
+    __doc__ = ParallelProcessor.__doc__

-  def __init__(self, processors, **kwargs):
-    min_preprocessed_file_size = min(p.min_preprocessed_file_size for p in
-                                     processors)
+    def __init__(self, processors, **kwargs):
+        min_preprocessed_file_size = min(p.min_preprocessed_file_size for p in
+                                         processors)

-    ParallelProcessor.__init__(self, processors)
-    Preprocessor.__init__(
-        self, min_preprocessed_file_size=min_preprocessed_file_size, **kwargs)
+        ParallelProcessor.__init__(self, processors)
+        Preprocessor.__init__(
+            self, min_preprocessed_file_size=min_preprocessed_file_size,
+            **kwargs)

-  def __call__(self, data, annotations):
-    return super(ParallelPreprocessor, self).__call__(
-        data, annotations=annotations)
+    def __call__(self, data, annotations):
+        return super(ParallelPreprocessor, self).__call__(
+            data, annotations=annotations)


 class CallablePreprocessor(Preprocessor):
-  """A simple preprocessor that takes a callable and applies that callable to
-  the input.
-
-  Attributes
-  ----------
-  accepts_annotations : bool
-      If False, annotations are not passed to the callable.
-  callable : object
-      Anything that is callable. It will be used as a preprocessor in
-      bob.bio.base.
-  """
-
-  def __init__(self, callable, accepts_annotations=True, **kwargs):
-    super(CallablePreprocessor, self).__init__(
-        callable=callable, accepts_annotations=accepts_annotations, **kwargs)
-    self.callable = callable
-    self.accepts_annotations = accepts_annotations
-
-  def __call__(self, data, annotations):
-    if self.accepts_annotations:
-      return self.callable(data, annotations)
-    else:
-      return self.callable(data)
+    """A simple preprocessor that takes a callable and applies that callable to
+    the input.
+
+    Attributes
+    ----------
+    accepts_annotations : bool
+        If False, annotations are not passed to the callable.
+    callable : object
+        Anything that is callable. It will be used as a preprocessor in
+        bob.bio.base.
+    """
+
+    def __init__(self, callable, accepts_annotations=True, **kwargs):
+        super(CallablePreprocessor, self).__init__(
+            callable=callable, accepts_annotations=accepts_annotations,
+            **kwargs)
+        self.callable = callable
+        self.accepts_annotations = accepts_annotations
+
+    def __call__(self, data, annotations):
+        if self.accepts_annotations:
+            return self.callable(data, annotations)
+        else:
+            return self.callable(data)
--- a/bob/bio/base/utils/processors.py
+++ b/bob/bio/base/utils/processors.py
@@ -2,77 +2,77 @@ import numpy


 class SequentialProcessor(object):
-  """A helper class which takes several processors and applies them one by one
-  sequentially
+    """A helper class which takes several processors and applies them one by
+    one sequentially

-  Attributes
-  ----------
-  processors : list
-      A list of processors to apply.
-  """
+    Attributes
+    ----------
+    processors : list
+        A list of processors to apply.
+    """

-  def __init__(self, processors, **kwargs):
-    super(SequentialProcessor, self).__init__()
-    self.processors = processors
+    def __init__(self, processors, **kwargs):
+        super(SequentialProcessor, self).__init__()
+        self.processors = processors

-  def __call__(self, data, **kwargs):
-    """Applies the processors on the data sequentially. The output of the first
-    one goes as input to the next one.
+    def __call__(self, data, **kwargs):
+        """Applies the processors on the data sequentially. The output of the
+        first one goes as input to the next one.

-    Parameters
-    ----------
-    data : object
-        The data that needs to be processed.
-    **kwargs
-        Any kwargs are passed to the processors.
+        Parameters
+        ----------
+        data : object
+            The data that needs to be processed.
+        **kwargs
+            Any kwargs are passed to the processors.

-    Returns
-    -------
-    object
-        The processed data.
-    """
-    for processor in self.processors:
-      data = processor(data, **kwargs)
-    return data
+        Returns
+        -------
+        object
+            The processed data.
+        """
+        for processor in self.processors:
+            data = processor(data, **kwargs)
+        return data


 class ParallelProcessor(object):
-  """A helper class which takes several processors and applies them on each
-  processor separately and outputs a list of their outputs in the end.
+    """A helper class which takes several processors and applies them on each
+    processor separately and outputs a list of their outputs in the end.

-  Attributes
-  ----------
-  processors : list
-      A list of processors to apply.
-  stack : bool
-      If True (default), :any:`numpy.hstack` is called on the list of outputs.
-  """
+    Attributes
+    ----------
+    processors : list
+        A list of processors to apply.
+    stack : bool
+        If True (default), :any:`numpy.hstack` is called on the list of outputs
+    """

-  def __init__(self, processors, stack=True, **kwargs):
-    super(ParallelProcessor, self).__init__()
-    self.processors = processors
-    self.stack = stack
+    def __init__(self, processors, stack=True, **kwargs):
+        super(ParallelProcessor, self).__init__()
+        self.processors = processors
+        self.stack = stack

-  def __call__(self, data, **kwargs):
-    """Applies the processors on the data independently and outputs a list of
-    their outputs.
+    def __call__(self, data, **kwargs):
+        """Applies the processors on the data independently and outputs a list of
+        their outputs.

-    Parameters
-    ----------
-    data : object
-        The data that needs to be processed.
-    **kwargs
-        Any kwargs are passed to the processors.
+        Parameters
+        ----------
+        data : object
+            The data that needs to be processed.
+        **kwargs
+            Any kwargs are passed to the processors.

-    Returns
-    -------
-    object
-        The processed data.
-    """
-    output = []
-    for processor in self.processors:
-      out = processor(data, **kwargs)
-      output.append(out)
-    if self.stack:
-      output = numpy.hstack(output)
-    return output
+        Returns
+        -------
+        object
+            The processed data.
+        """
+        output = []
+        for processor in self.processors:
+            out = processor(data, **kwargs)
+            output.append(out)
+        if self.stack:
+            output = numpy.hstack(output)
+        return output