[sphinx] Checkpoint

d8e1fb4d · Tiago de Freitas Pereira · 940c6b3d · d8e1fb4d · d8e1fb4d · d8e1fb4d
Commit d8e1fb4d authored 5 years ago by Tiago de Freitas Pereira
--- a/doc/checkpoint.rst
+++ b/doc/checkpoint.rst
 .. _checkpoint:

-=====================
+=======================
 Checkpointing Samples
-=====================
+=======================
+
+Mechanism that allows checkpointing of :py:class:`bob.pipelines.sample.Sample` during the processing of :py:class:`sklearn.pipeline.Pipeline` using `HDF5 <https://www.hdfgroup.org/solutions/hdf5/>`_ files.
+
+Very often during the processing of :py:class:`sklearn.pipeline.Pipeline` with big chunks of data is useful to have checkpoints of some steps of the pipeline into the disk.
+This is useful for several purposes:
+  - Reuse samples that are expensive to be re-computed
+  - Inspection of algorithms  
+
+
+Scikit learn has a caching mechanism that allows the caching of `estimators <https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html#sklearn.pipeline.Pipeline>`_ that can be used for such purpose.
+Althought useful, such structure is not user friendly.
+
+As in :ref:`sample`, this can be approached with the :py:class:`bob.pipelines.mixins.CheckpointMixin` mixin, where a new class can be created either dynamically with the :py:func:`bob.pipelines.mixings.mix_me_up` function:
+
+.. code:: python
+
+    >>> bob.pipelines.mixins import CheckpointMixin
+    >>> MyTransformerCheckpoint = mix_me_up((CheckpointMixin,), MyTransformer)
+
+or explicitly:
+
+.. code:: python
+
+    >>> bob.pipelines.mixins import CheckpointMixin
+    >>> class MyTransformerCheckpoint(CheckpointMixin, MyTransformer):
+    >>>     pass
+
+
+Checkpointing a transformer
+---------------------------
+
+The code below is a repetition of the example from :ref:`sample`, but now `MyTransformer` is checkpointable once `MyTransformer.transform` is executed.
+
+.. literalinclude:: ./python/pipeline_example_boosted_checkpoint.py
+   :linenos:
+   :emphasize-lines: 23, 28, 34, 38
+
+
+.. warning::
+
+    In line 28, samples are created with the keyword argument, `key`. The :py:class:`bob.pipelines.mixins.CheckpointMixin` uses this information for saving.
+
+
+The keyword argument `features_dir` defined in lines 34 and 38 sets the absolute path where those samples will be saved
+
+
+Checkpointing an statfull transformers
+--------------------------------------
+
+Statefull transformers, are transformers that implement the methods `fit` and `transform`.
+Those can be checkpointed too as can be observed in the example below.
+
+.. literalinclude:: ./python/pipeline_example_boosted_checkpoint_estimator.py
+   :linenos:
+   :emphasize-lines: 52-55
+
+
+The keyword argument `features_dir` and `model_payh` defined in lines 52 to 55 sets the absolute path where samples and the model trained after fit will be saved
+
+

--- a/doc/conf.py
+++ b/doc/conf.py
@@ -48,7 +48,7 @@ todo_include_todos = True
 autosummary_generate = True

 # Create numbers on figures with captions
-numfig = True
+numfig = False

 # If we are on OSX, the 'dvipng' path maybe different
 dvipng_osx = "/Library/TeX/texbin/dvipng"

--- a/doc/extra-intersphinx.txt
+++ b/doc/extra-intersphinx.txt
--- a/doc/python/pipeline_example_boosted.py
+++ b/doc/python/pipeline_example_boosted.py
@@ -18,10 +18,11 @@ class MyTransformer(TransformerMixin, BaseEstimator):
    def _more_tags(self):
        return {"stateless": True, "requires_fit": False}

+
 # Mixing up MyTransformer with the capabilities of handling Samples
 MyBoostedTransformer = mix_me_up((SampleMixin,), MyTransformer)

-# Creating X 
+# Creating X
 X = numpy.zeros((2, 2))
 # Wrapping X with Samples
 X_as_sample = Sample(X, metadata=1)
@@ -31,4 +32,4 @@ pipeline = make_pipeline(
    MyBoostedTransformer(transform_extra_arguments=(("metadata", "metadata"),)),
    MyBoostedTransformer(transform_extra_arguments=(("metadata", "metadata"),)),
 )
-X_transformed = pipeline.transform([X_as_sample])
\ No newline at end of file
+X_transformed = pipeline.transform([X_as_sample])
--- a/doc/python/pipeline_example_boosted_checkpoint.py
+++ b/doc/python/pipeline_example_boosted_checkpoint.py
+from bob.pipelines.sample import Sample
+from bob.pipelines.mixins import SampleMixin, CheckpointMixin, mix_me_up
+from sklearn.base import TransformerMixin, BaseEstimator
+from sklearn.pipeline import make_pipeline
+import numpy
+
+
+class MyTransformer(TransformerMixin, BaseEstimator):
+    def transform(self, X, metadata=None):
+        # Transform `X` with metadata
+        if metadata is None:
+            return X
+        return [x + m for x, m in zip(X, metadata)]
+
+    def fit(self, X):
+        pass
+
+    def _more_tags(self):
+        return {"stateless": True, "requires_fit": False}
+
+
+# Mixing up MyTransformer with the capabilities of handling Samples AND checkpointing
+MyBoostedTransformer = mix_me_up((CheckpointMixin, SampleMixin), MyTransformer)
+
+# Creating X
+X = numpy.zeros((2, 2))
+# Wrapping X with Samples
+X_as_sample = Sample(X, key="1", metadata=1)
+
+# Building an arbitrary pipeline
+pipeline = make_pipeline(
+    MyBoostedTransformer(
+        transform_extra_arguments=(("metadata", "metadata"),),
+        features_dir="./checkpoint_1",
+    ),
+    MyBoostedTransformer(
+        transform_extra_arguments=(("metadata", "metadata"),),
+        features_dir="./checkpoint_2",
+    ),
+)
+X_transformed = pipeline.transform([X_as_sample])
--- a/doc/python/pipeline_example_boosted_checkpoint_estimator.py
+++ b/doc/python/pipeline_example_boosted_checkpoint_estimator.py
+from bob.pipelines.sample import Sample
+from bob.pipelines.mixins import SampleMixin, CheckpointMixin, mix_me_up
+from sklearn.base import TransformerMixin, BaseEstimator
+from sklearn.pipeline import make_pipeline
+import numpy
+
+
+class MyTransformer(TransformerMixin, BaseEstimator):
+    def transform(self, X, metadata=None):
+        # Transform `X` with metadata
+        if metadata is None:
+            return X
+        return [x + m for x, m in zip(X, metadata)]
+
+    def fit(self, X):
+        pass
+
+    def _more_tags(self):
+        return {"stateless": True, "requires_fit": False}
+
+
+class MyFitTranformer(TransformerMixin, BaseEstimator):
+
+    def __init__(self, *args, **kwargs):
+        self._fit_model = None
+        super().__init__(*args, **kwargs)
+
+    def transform(self, X):
+        # Transform `X`
+        return [x @ self._fit_model for x in X]
+
+    def fit(self, X):        
+        self._fit_model = numpy.array([[1,2],[3,4]])
+        return self
+
+
+# Mixing up MyTransformer with the capabilities of handling Samples AND checkpointing
+MyBoostedTransformer = mix_me_up((CheckpointMixin, SampleMixin), MyTransformer)
+MyBoostedFitTransformer = mix_me_up((CheckpointMixin, SampleMixin), MyFitTranformer)
+
+# Creating X
+X = numpy.zeros((2, 2))
+# Wrapping X with Samples
+X_as_sample = Sample(X, key="1", metadata=1)
+
+# Building an arbitrary pipeline
+pipeline = make_pipeline(
+    MyBoostedTransformer(
+        transform_extra_arguments=(("metadata", "metadata"),),
+        features_dir="./checkpoint_1",
+    ),
+    MyBoostedFitTransformer(
+        features_dir="./checkpoint_2",
+        model_path="./checkpoint_2/model.pkl",
+    ),
+)
+X_transformed = pipeline.fit_transform([X_as_sample])