diff --git a/bob/learn/boosting/ExponentialLoss.py b/bob/learn/boosting/ExponentialLoss.py
index 4fb64045ff321229982b1d80230a90a1f2fe3b52..2a3915dfa802af0c6bcefbfc8fbf64ed9bf1cb6a 100644
--- a/bob/learn/boosting/ExponentialLoss.py
+++ b/bob/learn/boosting/ExponentialLoss.py
@@ -1,39 +1,36 @@
-from .LossFunction import LossFunction
-
 import numpy
+from . import LossFunction
 
-class ExponentialLoss (LossFunction):
-  """ The class implements the exponential loss function for the boosting framework."""
-
-
-  def loss(self, targets, scores):
-    """The function computes the exponential loss values using prediction scores and targets.
-    It can be used in classification tasks, e.g., in combination with the StumpTrainer.
 
-    Keyword parameters:
+class ExponentialLoss(LossFunction):
+    """ The class implements the exponential loss function for the boosting framework."""
 
-      targets (float <#samples, #outputs>): The target values that should be reached.
+    def loss(self, targets, scores):
+        """The function computes the exponential loss values using prediction scores and targets.
+        It can be used in classification tasks, e.g., in combination with the StumpTrainer.
 
-      scores (float <#samples, #outputs>): The scores provided by the classifier.
+        Keyword parameters:
 
-    Returns
-      (float <#samples, #outputs>): The loss values for the samples, always >= 0
-    """
-    return numpy.exp(-(targets * scores))
+          targets (float <#samples, #outputs>): The target values that should be reached.
 
+          scores (float <#samples, #outputs>): The scores provided by the classifier.
 
-  def loss_gradient(self, targets, scores):
-    """The function computes the gradient of the exponential loss function using prediction scores and targets.
+        Returns
+          (float <#samples, #outputs>): The loss values for the samples, always >= 0
+        """
+        return numpy.exp(-(targets * scores))
 
-    Keyword parameters:
+    def loss_gradient(self, targets, scores):
+        """The function computes the gradient of the exponential loss function using prediction scores and targets.
 
-      targets (float <#samples, #outputs>): The target values that should be reached.
+        Keyword parameters:
 
-      scores (float <#samples, #outputs>): The scores provided by the classifier.
+          targets (float <#samples, #outputs>): The target values that should be reached.
 
-    Returns
-      loss (float <#samples, #outputs>): The gradient of the loss based on the given scores and targets.
-    """
-    loss = numpy.exp(-(targets * scores))
-    return -targets * loss
+          scores (float <#samples, #outputs>): The scores provided by the classifier.
 
+        Returns
+          loss (float <#samples, #outputs>): The gradient of the loss based on the given scores and targets.
+        """
+        loss = numpy.exp(-(targets * scores))
+        return -targets * loss
diff --git a/bob/learn/boosting/LogitLoss.py b/bob/learn/boosting/LogitLoss.py
index 4b91eaed099731e6b199c4a44006d337837e7005..635ed9ea805a6502b6ddd521c22462677fb0d473 100644
--- a/bob/learn/boosting/LogitLoss.py
+++ b/bob/learn/boosting/LogitLoss.py
@@ -1,38 +1,38 @@
-from .LossFunction import LossFunction
+from . import LossFunction
 
 import numpy
 
-class LogitLoss(LossFunction):
-  """ The class to implement the logit loss function for the boosting framework."""
 
-  def loss(self, targets, scores):
-    """The function computes the logit loss values using prediction scores and targets.
+class LogitLoss(LossFunction):
+    """ The class to implement the logit loss function for the boosting framework."""
 
-    Keyword parameters:
+    def loss(self, targets, scores):
+        """The function computes the logit loss values using prediction scores and targets.
 
-      targets (float <#samples, #outputs>): The target values that should be reached.
+        Keyword parameters:
 
-      scores (float <#samples, #outputs>): The scores provided by the classifier.
+          targets (float <#samples, #outputs>): The target values that should be reached.
 
-    Returns
-      (float <#samples, #outputs>): The loss values for the samples, which is always >= 0
-    """
-    e = numpy.exp(-(targets * scores))
-    return numpy.log(1. + e)
+          scores (float <#samples, #outputs>): The scores provided by the classifier.
 
+        Returns
+          (float <#samples, #outputs>): The loss values for the samples, which is always >= 0
+        """
+        e = numpy.exp(-(targets * scores))
+        return numpy.log(1. + e)
 
-  def loss_gradient(self, targets, scores):
-    """The function computes the gradient of the logit loss function using prediction scores and targets.
+    def loss_gradient(self, targets, scores):
+        """The function computes the gradient of the logit loss function using prediction scores and targets.
 
-    Keyword parameters:
+        Keyword parameters:
 
-      targets (float <#samples, #outputs>): The target values that should be reached.
+          targets (float <#samples, #outputs>): The target values that should be reached.
 
-      scores (float <#samples, #outputs>): The scores provided by the classifier.
+          scores (float <#samples, #outputs>): The scores provided by the classifier.
 
-    Returns
-      loss (float <#samples, #outputs>): The gradient of the loss based on the given scores and targets.
-    """
-    e = numpy.exp(-(targets * scores))
-    denom = 1./(1. + e)
-    return -targets * e * denom
+        Returns
+          loss (float <#samples, #outputs>): The gradient of the loss based on the given scores and targets.
+        """
+        e = numpy.exp(-(targets * scores))
+        denom = 1. / (1. + e)
+        return -targets * e * denom
diff --git a/bob/learn/boosting/LossFunction.py b/bob/learn/boosting/LossFunction.py
index 4122ef2e93187e98e7207af873dc4865c460f23d..05f4c40a35ca467571dae93a86eb59b3c77fbb1b 100644
--- a/bob/learn/boosting/LossFunction.py
+++ b/bob/learn/boosting/LossFunction.py
@@ -1,97 +1,98 @@
 import numpy
 
-class LossFunction:
-  """This is a base class for all loss functions implemented in pure python.
-  It is simply a python re-implementation of the :py:class:`bob.learn.boosting.LossFunction` class.
 
-  This class provides the interface for the L-BFGS optimizer.
-  Please overwrite the loss() and loss_gradient() function (see below) in derived loss classes.
-  """
+class LossFunction(object):
+    """This is a base class for all loss functions implemented in pure python.
+    It is simply a python re-implementation of the :py:class:`bob.learn.boosting.LossFunction` class.
 
-  def loss(self, targets, scores):
-    """This function is to compute the loss for the given targets and scores.
-
-    Keyword parameters:
+    This class provides the interface for the L-BFGS optimizer.
+    Please overwrite the loss() and loss_gradient() function (see below) in derived loss classes.
+    """
 
-      targets (float <#samples, #outputs>): The target values that should be reached.
+    def __init__(self):
+        pass
 
-      scores (float <#samples, #outputs>): The scores provided by the classifier.
+    def loss(self, targets, scores):
+        """This function is to compute the loss for the given targets and scores.
 
-    Returns
-      (float <#samples, #outputs>) or (float <#samples, 1>): The loss based on the given scores and targets.
-      Depending on the intended task, one of the two output variants should be chosen.
-      For classification tasks, please use the former way (#samples, #outputs), while for regression tasks, use the latter (#samples, 1).
-    """
-    raise NotImplementedError("This is a pure abstract function. Please implement that in your derived class.")
+        Keyword parameters:
 
+          targets (float <#samples, #outputs>): The target values that should be reached.
 
-  def loss_gradient(self, targets, scores):
-    """This function is to compute the gradient of the loss for the given targets and scores.
+          scores (float <#samples, #outputs>): The scores provided by the classifier.
 
-    Keyword parameters:
+        Returns
+          (float <#samples, #outputs>) or (float <#samples, 1>): The loss based on the given scores and targets.
+          Depending on the intended task, one of the two output variants should be chosen.
+          For classification tasks, please use the former way (#samples, #outputs), while for regression tasks, use the latter (#samples, 1).
+        """
+        raise NotImplementedError("This is a pure abstract function. Please implement that in your derived class.")
 
-      targets (float <#samples, #outputs>): The target values that should be reached.
+    def loss_gradient(self, targets, scores):
+        """This function is to compute the gradient of the loss for the given targets and scores.
 
-      scores (float <#samples, #outputs>): The scores provided by the classifier.
+        Keyword parameters:
 
-    Returns
-      loss (float <#samples, #outputs>): The gradient of the loss based on the given scores and targets.
-    """
-    raise NotImplementedError("This is a pure abstract function. Please implement that in your derived class.")
+          targets (float <#samples, #outputs>): The target values that should be reached.
 
+          scores (float <#samples, #outputs>): The scores provided by the classifier.
 
-  def loss_sum(self, alpha, targets, previous_scores, current_scores):
-    """The function computes the sum of the loss which is used to find the optimized values of alpha (x).
+        Returns
+          loss (float <#samples, #outputs>): The gradient of the loss based on the given scores and targets.
+        """
+        raise NotImplementedError("This is a pure abstract function. Please implement that in your derived class.")
 
-    The functions computes sum of loss values which is required during the line search step for the optimization of the alpha.
-    This function is given as the input for the L-BFGS optimization function.
+    def loss_sum(self, alpha, targets, previous_scores, current_scores):
+        """The function computes the sum of the loss which is used to find the optimized values of alpha (x).
 
-    Keyword parameters:
+        The functions computes sum of loss values which is required during the line search step for the optimization of the alpha.
+        This function is given as the input for the L-BFGS optimization function.
 
-      alpha (float): The current value of the alpha.
+        Keyword parameters:
 
-      targets (float <#samples, #outputs>): The targets for the samples
+          alpha (float): The current value of the alpha.
 
-      previous_scores (float <#samples, #outputs>): The cumulative prediction scores of the samples until the previous round of the boosting.
+          targets (float <#samples, #outputs>): The targets for the samples
 
-      current_scores (float <#samples, #outputs>): The prediction scores of the samples for the current round of the boosting.
+          previous_scores (float <#samples, #outputs>): The cumulative prediction scores of the samples until the previous round of the boosting.
 
-    Returns
+          current_scores (float <#samples, #outputs>): The prediction scores of the samples for the current round of the boosting.
 
-      (float <#outputs>) The sum of the loss values for the current value of the alpha
-    """
+        Returns
 
-    # compute the scores and loss for the current alpha
-    scores = previous_scores + alpha * current_scores
-    losses = self.loss(targets, scores)
+          (float <#outputs>) The sum of the loss values for the current value of the alpha
+        """
 
-    # compute the sum of the loss
-    return numpy.sum(losses, 0)
+        # compute the scores and loss for the current alpha
+        scores = previous_scores + alpha * current_scores
+        losses = self.loss(targets, scores)
 
+        # compute the sum of the loss
+        return numpy.sum(losses, 0)
 
-  def loss_gradient_sum(self, alpha, targets, previous_scores, current_scores):
-    """The function computes the gradient as the sum of the derivatives per sample which is used to find the optimized values of alpha.
+    def loss_gradient_sum(self, alpha, targets, previous_scores, current_scores):
+        """The function computes the gradient as the sum of the derivatives per sample which is used to find the optimized values of alpha.
 
-    The functions computes sum of loss values which is required during the line search step for the optimization of the alpha.
-    This function is given as the input for the L-BFGS optimization function.
+        The functions computes sum of loss values which is required during the line search step for the optimization of the alpha.
+        This function is given as the input for the L-BFGS optimization function.
 
-    Keyword parameters:
+        Keyword parameters:
 
-      alpha (float): The current value of the alpha.
+          alpha (float): The current value of the alpha.
 
-      targets (float <#samples, #outputs>): The targets for the samples
+          targets (float <#samples, #outputs>): The targets for the samples
 
-      previous_scores (float <#samples, #outputs>): The cumulative prediction scores of the samples until the previous round of the boosting.
+          previous_scores (float <#samples, #outputs>): The cumulative prediction scores of the samples until the previous round of the boosting.
 
-      current_scores (float <#samples, #outputs>): The prediction scores of the samples for the current round of the boosting.
+          current_scores (float <#samples, #outputs>): The prediction scores of the samples for the current round of the boosting.
 
-    Returns
-      (float <#outputs>) The sum of the loss gradient for the current value of the alpha.
-    """
+        Returns
+          (float <#outputs>) The sum of the loss gradient for the current value of the alpha.
+        """
 
-    # compute the loss gradient for the updated score
-    scores = previous_scores + alpha * current_scores
-    loss_gradients = self.loss_gradient(targets, scores)
+        # compute the loss gradient for the updated score
+        scores = previous_scores + alpha * current_scores
+        loss_gradients = self.loss_gradient(targets, scores)
 
-    # take the sum of the loss gradient values
-    return numpy.sum(loss_gradients * current_scores, 0)
+        # take the sum of the loss gradient values
+        return numpy.sum(loss_gradients * current_scores, 0)
diff --git a/bob/learn/boosting/TangentialLoss.py b/bob/learn/boosting/TangentialLoss.py
index e33a4efeddcdbafd5d230b83d0cea7e8e63f122a..604b8a0d14e6089dbac55aaf33346ebe42469bcc 100644
--- a/bob/learn/boosting/TangentialLoss.py
+++ b/bob/learn/boosting/TangentialLoss.py
@@ -1,38 +1,38 @@
-from .LossFunction import LossFunction
+from . import LossFunction
 
 import numpy
 
-class TangentialLoss (LossFunction):
-  """Tangent loss function, as described in http://www.svcl.ucsd.edu/projects/LossDesign/TangentBoost.html."""
 
-  def loss(self, targets, scores):
-    """The function computes the logit loss values using prediction scores and targets.
+class TangentialLoss(LossFunction):
+    """Tangent loss function, as described in http://www.svcl.ucsd.edu/projects/LossDesign/TangentBoost.html."""
 
-    Keyword parameters:
+    def loss(self, targets, scores):
+        """The function computes the logit loss values using prediction scores and targets.
 
-      targets (float <#samples, #outputs>): The target values that should be reached.
+        Keyword parameters:
 
-      scores (float <#samples, #outputs>): The scores provided by the classifier.
+          targets (float <#samples, #outputs>): The target values that should be reached.
 
-    Returns
-      (float <#samples, #outputs>): The loss values for the samples, always >= 0
-    """
-    return (2. * numpy.arctan(targets * scores) - 1.)**2
+          scores (float <#samples, #outputs>): The scores provided by the classifier.
 
-  def loss_gradient(self, targets, scores):
-    """The function computes the gradient of the tangential loss function using prediction scores and targets.
+        Returns
+          (float <#samples, #outputs>): The loss values for the samples, always >= 0
+        """
+        return (2. * numpy.arctan(targets * scores) - 1.) ** 2
 
-    Keyword parameters:
+    def loss_gradient(self, targets, scores):
+        """The function computes the gradient of the tangential loss function using prediction scores and targets.
 
-      targets (float <#samples, #outputs>): The target values that should be reached.
+        Keyword parameters:
 
-      scores (float <#samples, #outputs>): The scores provided by the classifier.
+          targets (float <#samples, #outputs>): The target values that should be reached.
 
-    Returns
-      loss (float <#samples, #outputs>): The gradient of the loss based on the given scores and targets.
-    """
-    m = targets * scores
-    numer = 4. * (2. * numpy.arctan(m) - 1.)
-    denom = 1. + m**2
-    return numer/denom
+          scores (float <#samples, #outputs>): The scores provided by the classifier.
 
+        Returns
+          loss (float <#samples, #outputs>): The gradient of the loss based on the given scores and targets.
+        """
+        m = targets * scores
+        numer = 4. * (2. * numpy.arctan(m) - 1.)
+        denom = 1. + m ** 2
+        return numer / denom
diff --git a/bob/learn/boosting/__init__.py b/bob/learn/boosting/__init__.py
index 11e2affb3086db96492bbe8c109612941040ff23..d25303069623f40ed230e92de5d347e96ac7f705 100644
--- a/bob/learn/boosting/__init__.py
+++ b/bob/learn/boosting/__init__.py
@@ -11,7 +11,7 @@ from bob.learn.boosting.version import module as __version__
 from bob.learn.boosting.version import api as __api_version__
 
 # include loss functions
-from bob.learn.boosting import LossFunction # Just to get the documentation for it
+from bob.learn.boosting.LossFunction import LossFunction # Just to get the documentation for it
 from bob.learn.boosting.ExponentialLoss import ExponentialLoss
 from bob.learn.boosting.LogitLoss import LogitLoss
 from bob.learn.boosting.TangentialLoss import TangentialLoss
@@ -34,5 +34,19 @@ def get_config():
   return bob.extension.get_config(__name__, version.externals, version.api)
 
 
+# gets sphinx autodoc done right - don't remove it
+def __appropriate__(*args):
+  """Says object was actually declared here, an not on the import module.
+  Parameters:
+    *args: An iterable of objects to modify
+  Resolves `Sphinx referencing issues
+  <https://github.com/sphinx-doc/sphinx/issues/3048>`
+  """
+  for obj in args: obj.__module__ = __name__
+__appropriate__(
+    LossFunction,
+    )
+
 # gets sphinx autodoc done right - don't remove it
 __all__ = [_ for _ in dir() if not _.startswith('_')]
+
diff --git a/bob/learn/boosting/boosted_machine.cpp b/bob/learn/boosting/boosted_machine.cpp
index 8c0643905a7f0bbc91bc11ab49770554f5c3beb1..b1bd7730be613716dbd93ec2a879dc176224d659 100644
--- a/bob/learn/boosting/boosted_machine.cpp
+++ b/bob/learn/boosting/boosted_machine.cpp
@@ -245,7 +245,7 @@ static PyObject* boostedMachine_add(
 static auto boostedMachine_forward_doc = bob::extension::FunctionDoc(
   "forward",
   "Returns the prediction for the given feature vector(s)",
-  ".. note:: The :py:func:`__call__` function is an alias for this function.\n\n"
+  ".. note:: The ``__call__`` function is an alias for this function.\n\n"
   "This function can be called in six different ways:\n\n"
   "1. ``(uint16 <#inputs>)`` will compute and return the uni-variate prediction for a single feature vector.\n"
   "2. ``(uint16 <#samples,#inputs>, float <#samples>)`` will compute the uni-variate prediction for several feature vectors.\n"
diff --git a/bob/learn/boosting/lut_machine.cpp b/bob/learn/boosting/lut_machine.cpp
index beef85a7c2d7bfa2526ecd9e478f864a0e0afa15..f097b27133eb33fbbcae06ef444a55253cd04dbe 100644
--- a/bob/learn/boosting/lut_machine.cpp
+++ b/bob/learn/boosting/lut_machine.cpp
@@ -157,7 +157,7 @@ static PyObject* lutMachine_lut(
 static auto lutMachine_forward_doc = bob::extension::FunctionDoc(
   "forward",
   "Returns the prediction for the given feature vector(s)",
-  ".. note:: The :py:func:`__call__` function is an alias for this function.\n\n"
+  ".. note:: The ``__call__`` function is an alias for this function.\n\n"
   "This function can be called in four different ways:\n\n"
   "1. ``(uint16 <#inputs>)`` will compute and return the uni-variate prediction for a single feature vector.\n"
   "2. ``(uint16 <#samples,#inputs>, float <#samples>)`` will compute the uni-variate prediction for several feature vectors.\n"
diff --git a/bob/learn/boosting/stump_machine.cpp b/bob/learn/boosting/stump_machine.cpp
index 5388f6ab5e82060cc0d26bbaeabc64e9fdf40d9e..7a4a680e019041acf8f9c2851bf213fb10597abb 100644
--- a/bob/learn/boosting/stump_machine.cpp
+++ b/bob/learn/boosting/stump_machine.cpp
@@ -129,7 +129,7 @@ static PyObject* stumpMachine_polarity(
 static auto stumpMachine_forward_doc = bob::extension::FunctionDoc(
   "forward",
   "Returns the prediction for the given feature vector(s)",
-  ".. note:: The :py:func:`__call__` function is an alias for this function.\n\n"
+  ".. note:: The ``__call__`` function is an alias for this function.\n\n"
   ".. todo:: write more detailed documentation",
   true
 )
diff --git a/doc/py_api.rst b/doc/py_api.rst
index 1d926d149dd57e304cadc70f11bdf564e4378703..4b30be72cf9f0dc051d21eb46181ef8b309f948d 100644
--- a/doc/py_api.rst
+++ b/doc/py_api.rst
@@ -26,7 +26,7 @@ Available trainers in :py:mod:`bob.learn.boosting` are:
 
 * :py:class:`bob.learn.boosting.Boosting` : Trains a strong machine of type :py:class:`bob.learn.boosting.BoostedMachine`.
 * :py:class:`bob.learn.boosting.LUTTrainer` : Trains a weak machine of type :py:class:`bob.learn.boosting.LUTMachine`.
-* :py:class:`bob.learn.boosting.StrumTrainer` : Trains a weak machine of type :py:class:`bob.learn.boosting.StumpMachine`.
+* :py:class:`bob.learn.boosting.StumpTrainer` : Trains a weak machine of type :py:class:`bob.learn.boosting.StumpMachine`.
 
 
 Loss functions
@@ -40,9 +40,9 @@ A base class loss function :py:class:`bob.learn.boosting.LossFunction` is called
   Not all combinations of loss functions and weak trainers make sense.
   Here is a list of useful combinations:
 
-  1. :py:class:`bob.learn.boosting.ExponentialLoss` with :py:class:`bob.learn.boosting.StrumTrainer` (uni-variate classification only).
-  2. :py:class:`bob.learn.boosting.LogitLoss` with :py:class:`bob.learn.boosting.StrumTrainer` or :py:class:`bob.learn.boosting.LUTTrainer` (uni-variate or multi-variate classification).
-  3. :py:class:`bob.learn.boosting.TangentialLoss` with :py:class:`bob.learn.boosting.StrumTrainer` or :py:class:`bob.learn.boosting.LUTTrainer` (uni-variate or multi-variate classification).
+  1. :py:class:`bob.learn.boosting.ExponentialLoss` with :py:class:`bob.learn.boosting.StumpTrainer` (uni-variate classification only).
+  2. :py:class:`bob.learn.boosting.LogitLoss` with :py:class:`bob.learn.boosting.StumpTrainer` or :py:class:`bob.learn.boosting.LUTTrainer` (uni-variate or multi-variate classification).
+  3. :py:class:`bob.learn.boosting.TangentialLoss` with :py:class:`bob.learn.boosting.StumpTrainer` or :py:class:`bob.learn.boosting.LUTTrainer` (uni-variate or multi-variate classification).
   4. :py:class:`bob.learn.boosting.JesorskyLoss` with :py:class:`bob.learn.boosting.LUTTrainer` (multi-variate regression only).
 
 Details