diff --git a/doc/evaluation.rst b/doc/evaluation.rst
index 0368ad6ddd23ab5c231a0e3c29fe8f88f9c0e51b..1f503da7b30bd917c631c3ebe3362b0b3b1deb25 100644
--- a/doc/evaluation.rst
+++ b/doc/evaluation.rst
@@ -16,12 +16,12 @@ Inference
 ---------
 
 You may use one of your trained models (or :ref:`one of ours
-<bob.ip.binseg.models>` to run inference on existing datasets or your own
-dataset.  In inference (or prediction) mode, we input data, the trained model,
-and output HDF5 files containing the prediction outputs for every input image.
-Each HDF5 file contains a single object with a 2-dimensional matrix of floating
-point numbers indicating the vessel probability (``[0.0,1.0]``) for each pixel
-in the input image.
+<bob.ip.binseg.results.baselines>` to run inference on existing datasets or
+your own dataset.  In inference (or prediction) mode, we input data, the
+trained model, and output HDF5 files containing the prediction outputs for
+every input image.  Each HDF5 file contains a single object with a
+2-dimensional matrix of floating point numbers indicating the vessel
+probability (``[0.0,1.0]``) for each pixel in the input image.
 
 
 Inference on an existing dataset
@@ -38,7 +38,7 @@ To run inference, use the sub-command :ref:`predict
 Replace ``<model>`` and ``<dataset>`` by the appropriate :ref:`configuration
 files <bob.ip.binseg.configs>`.  Replace ``<path/to/model.pth>`` to a path
 leading to the pre-trained model, or URL pointing to a pre-trained model (e.g.
-:ref:`one of ours <bob.ip.binseg.models>`).
+:ref:`one of ours <bob.ip.binseg.results.baselines>`).
 
 
 Inference on a custom dataset
diff --git a/doc/links.rst b/doc/links.rst
index ce2c72d68eb7f9a94b4704b39e30cbb3da111552..8a9234e4c07161768ae38cea20552079cb0eea08 100644
--- a/doc/links.rst
+++ b/doc/links.rst
@@ -27,27 +27,47 @@
 
 .. Pretrained models
 
-.. _baselines_driu_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu/drive/model.pth
-.. _baselines_hed_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed/drive/model.pth
-.. _baselines_m2unet_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet/drive/model.pth
-.. _baselines_unet_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet/drive/model.pth
-.. _baselines_driu_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu/stare/model.pth
-.. _baselines_hed_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed/stare/model.pth
-.. _baselines_m2unet_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet/stare/model.pth
-.. _baselines_unet_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet/stare/model.pth
-.. _baselines_driu_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu/chasedb1/model.pth
-.. _baselines_hed_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed/chasedb1/model.pth
-.. _baselines_m2unet_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet/chasedb1/model.pth
-.. _baselines_unet_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet/chasedb1/model.pth
-.. _baselines_driu_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu/hrf/model.pth
-.. _baselines_hed_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed/hrf/model.pth
-.. _baselines_m2unet_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet/hrf/model.pth
-.. _baselines_unet_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet/hrf/model.pth
-.. _baselines_driu_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu/iostar-vessel/model.pth
-.. _baselines_hed_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed/iostar-vessel/model.pth
-.. _baselines_m2unet_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet/iostar-vessel/model.pth
-.. _baselines_unet_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet/iostar-vessel/model.pth
+.. _baselines_driu_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu-drive-1947d9fa.pth
+.. _baselines_hed_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed-drive-c8b86082.pth
+.. _baselines_m2unet_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet-drive-ce4c7a53.pth
+.. _baselines_unet_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet-drive-0ac99e2e.pth
+.. _baselines_driu_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu-stare-79dec93a.pth
+.. _baselines_hed_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed-stare-fcdb7671.pth
+.. _baselines_m2unet_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet-stare-952778c2.pth
+.. _baselines_unet_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet-stare-49b6a6d0.pth
+.. _baselines_driu_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu-chasedb1-e7cf53c3.pth
+.. _baselines_hed_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed-chasedb1-55ec6d34.pth
+.. _baselines_m2unet_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet-chasedb1-0becbf29.pth
+.. _baselines_unet_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet-chasedb1-be41b5a5.pth
+.. _baselines_driu_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu-hrf-c9e6a889.pth
+.. _baselines_hed_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed-hrf-3f4ab1c4.pth
+.. _baselines_m2unet_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet-hrf-2c3f2485.pth
+.. _baselines_unet_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet-hrf-9a559821.pth
+.. _baselines_driu_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu-iostar-vessel-ef8cc27b.pth
+.. _baselines_hed_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed-iostar-vessel-37cfaee1.pth
+.. _baselines_m2unet_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet-iostar-vessel-223b61ef.pth
+.. _baselines_unet_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet-iostar-vessel-86c78e87.pth
 
+.. _covd_driu_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/driu/drive/model.pth
+.. _covd_hed_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/hed/drive/model.pth
+.. _covd_m2unet_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/m2unet/drive/model.pth
+.. _covd_unet_drive: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/unet/drive/model.pth
+.. _covd_driu_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/driu/stare/model.pth
+.. _covd_hed_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/hed/stare/model.pth
+.. _covd_m2unet_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/m2unet/stare/model.pth
+.. _covd_unet_stare: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/unet/stare/model.pth
+.. _covd_driu_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/driu/chasedb1/model.pth
+.. _covd_hed_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/hed/chasedb1/model.pth
+.. _covd_m2unet_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/m2unet/chasedb1/model.pth
+.. _covd_unet_chase: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/unet/chasedb1/model.pth
+.. _covd_driu_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/driu/hrf/model.pth
+.. _covd_hed_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/hed/hrf/model.pth
+.. _covd_m2unet_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/m2unet/hrf/model.pth
+.. _covd_unet_hrf: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/unet/hrf/model.pth
+.. _covd_driu_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/driu/iostar-vessel/model.pth
+.. _covd_hed_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/hed/iostar-vessel/model.pth
+.. _covd_m2unet_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/m2unet/iostar-vessel/model.pth
+.. _covd_unet_iostar: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/covd/unet/iostar-vessel/model.pth
 
 .. DRIVE
 .. _driu_drive.pth: https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/DRIU_DRIVE.pth
diff --git a/doc/models.rst b/doc/models.rst
deleted file mode 100644
index 168b2414ad72cc730220315ca08ef6cb0a38411a..0000000000000000000000000000000000000000
--- a/doc/models.rst
+++ /dev/null
@@ -1,65 +0,0 @@
-.. -*- coding: utf-8 -*-
-
-.. _bob.ip.binseg.models:
-
-===================
- Pretrained Models
-===================
-
-We offer the following pre-trained models allowing inference and score
-reproduction of our results.  Due to storage limitations we only provide
-weights of a subset of all evaluated models.
-
-
-.. list-table::
-
-   * - **Datasets / Models**
-     - :py:mod:`driu <bob.ip.binseg.configs.models.driu>`
-     - :py:mod:`m2unet <bob.ip.binseg.configs.models.m2unet>`
-   * - :py:mod:`drive <bob.ip.binseg.configs.datasets.drive.default>`
-     - driu_drive.pth_
-     - m2unet_drive.pth_
-   * - :py:mod:`drive-drive <bob.ip.binseg.configs.datasets.drive.covd>`
-     -
-     - m2unet_covd-drive.pth_
-   * - :py:mod:`drive-ssl <bob.ip.binseg.configs.datasets.drive.ssl>`
-     -
-     - m2unet_covd-drive_ssl.pth_
-   * - :py:mod:`stare <bob.ip.binseg.configs.datasets.stare.ah>`
-     - driu_stare.pth_
-     - m2unet_stare.pth_
-   * - :py:mod:`stare-covd <bob.ip.binseg.configs.datasets.stare.covd>`
-     -
-     - m2unet_covd-stare.pth_
-   * - :py:mod:`stare-ssl <bob.ip.binseg.configs.datasets.stare.ssl>`
-     -
-     - m2unet_covd-stare_ssl.pth_
-   * - :py:mod:`chasedb1 <bob.ip.binseg.configs.datasets.chasedb1.first_annotator>`
-     - driu_chasedb1.pth_
-     - m2unet_chasedb1.pth_
-   * - :py:mod:`chasedb1-covd <bob.ip.binseg.configs.datasets.chasedb1.covd>`
-     -
-     - m2unet_covd-chasedb1.pth_
-   * - :py:mod:`chasedb1-ssl <bob.ip.binseg.configs.datasets.chasedb1.ssl>`
-     -
-     - m2unet_covd-chasedb1_ssl.pth_
-   * - :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.vessel>`
-     - driu_iostar.pth_
-     - m2unet_iostar.pth_
-   * - :py:mod:`iostar-vessel-covd <bob.ip.binseg.configs.datasets.iostar.covd>`
-     -
-     - m2unet_covd-iostar.pth_
-   * - :py:mod:`iostar-vessel-ssl <bob.ip.binseg.configs.datasets.iostar.ssl>`
-     -
-     - m2unet_covd-iostar_ssl.pth_
-   * - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>`
-     - driu_hrf.pth_
-     - m2unet_hrf.pth_
-   * - :py:mod:`hrf-covd <bob.ip.binseg.configs.datasets.hrf.covd>`
-     -
-     - m2unet_covd-hrf.pth_
-   * - :py:mod:`hrf-ssl <bob.ip.binseg.configs.datasets.hrf.ssl>`
-     -
-     - m2unet_covd-hrf_ssl.pth_
-
-.. include:: links.rst
diff --git a/doc/results/baselines/chasedb1.pdf b/doc/results/baselines/chasedb1.pdf
index 1d5924019d99f4ed1205199113174b43d86bcdc6..d9418224cffb8725c569e268885f1dc559407e4b 100644
Binary files a/doc/results/baselines/chasedb1.pdf and b/doc/results/baselines/chasedb1.pdf differ
diff --git a/doc/results/baselines/chasedb1.png b/doc/results/baselines/chasedb1.png
index ea3268bfa2054109a3e92e3c0dae422842f0c6b5..ae1ebe8b05017c3ca49d662830252c612d9f16e4 100644
Binary files a/doc/results/baselines/chasedb1.png and b/doc/results/baselines/chasedb1.png differ
diff --git a/doc/results/baselines/drive.pdf b/doc/results/baselines/drive.pdf
index f9f4ccefa1ba43215a01d1739624b158654111ee..d54253ef754323baf7ebe156ce5016d72e343489 100644
Binary files a/doc/results/baselines/drive.pdf and b/doc/results/baselines/drive.pdf differ
diff --git a/doc/results/baselines/drive.png b/doc/results/baselines/drive.png
index 438a94e8b59b687bdcbce4a460d24cd6e84a1f36..d0247edfd89325fd028aa67c6328ad7f269c57b4 100644
Binary files a/doc/results/baselines/drive.png and b/doc/results/baselines/drive.png differ
diff --git a/doc/results/baselines/hrf-fullres.pdf b/doc/results/baselines/hrf-fullres.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..a3473f2f3a685fa27eed05d9c7c8c23b517358ea
Binary files /dev/null and b/doc/results/baselines/hrf-fullres.pdf differ
diff --git a/doc/results/baselines/hrf-fullres.png b/doc/results/baselines/hrf-fullres.png
new file mode 100644
index 0000000000000000000000000000000000000000..f33a948e00f2bcf79be8afcb8b32bdfbc4775ae8
Binary files /dev/null and b/doc/results/baselines/hrf-fullres.png differ
diff --git a/doc/results/baselines/hrf.pdf b/doc/results/baselines/hrf.pdf
index 59ba2ddfee9e5451ad9218b80d4eadeb81fdb6c3..a8e5d7342338aa09971efb47aa801d268c49cc9e 100644
Binary files a/doc/results/baselines/hrf.pdf and b/doc/results/baselines/hrf.pdf differ
diff --git a/doc/results/baselines/hrf.png b/doc/results/baselines/hrf.png
index b78f3e4d1e573e51565df357448e74613ac6a381..b1e006be7647212abd6fb53c0c213d097fc416a3 100644
Binary files a/doc/results/baselines/hrf.png and b/doc/results/baselines/hrf.png differ
diff --git a/doc/results/baselines/index.rst b/doc/results/baselines/index.rst
index 76a9d3b2e303b0a27ad8f03af6a2c40c29960388..52958862620feda24741880fcd891b533d5566d0 100644
--- a/doc/results/baselines/index.rst
+++ b/doc/results/baselines/index.rst
@@ -11,9 +11,13 @@ F1 Scores (micro-level)
 
 * Benchmark results for models: DRIU, HED, M2U-Net and U-Net.
 * Models are trained and tested on the same dataset (**numbers in bold**
-  indicate number of parameters per model).  Models are trained for a fixed
-  number of 1000 epochs, with a learning rate of 0.001 until epoch 900 and then
-  0.0001 until the end of the training.
+  indicate approximate number of parameters per model).  Models are trained for
+  a fixed number of 1000 epochs, with a learning rate of 0.001 until epoch 900
+  and then 0.0001 until the end of the training.
+* During the training session, an unaugmented copy of the training set is used
+  as validation set.  We keep checkpoints for the best performing networks
+  based on such validation set.  The best performing network during training is
+  used for evaluation.
 * Database and model resource configuration links (table top row and left
   column) are linked to the originating configuration files used to obtain
   these results.
@@ -26,6 +30,9 @@ F1 Scores (micro-level)
   analyze`` providing the model URL as ``--weight`` parameter.
 * For comparison purposes, we provide "second-annotator" performances on the
   same test set, where available.
+* :ref:`Our baseline script <bob.ip.binseg.baseline-script>` was used to
+  generate the results displayed here.
+* HRF models were trained using half the full resolution (1168x1648)
 
 
 .. list-table::
@@ -45,35 +52,40 @@ F1 Scores (micro-level)
      - 25.8M
    * - :py:mod:`drive <bob.ip.binseg.configs.datasets.drive.default>`
      - 0.788  (0.021)
-     - `0.819 (0.017) <baselines_driu_drive_>`_
-     - `0.806 (0.017) <baselines_hed_drive_>`_
-     - `0.803 (0.017) <baselines_m2unet_drive_>`_
-     - `0.823 (0.016) <baselines_unet_drive_>`_
+     - `0.821 (0.014) <baselines_driu_drive_>`_
+     - `0.813 (0.016) <baselines_hed_drive_>`_
+     - `0.802 (0.014) <baselines_m2unet_drive_>`_
+     - `0.825 (0.015) <baselines_unet_drive_>`_
    * - :py:mod:`stare <bob.ip.binseg.configs.datasets.stare.ah>`
      - 0.759 (0.028)
-     - `0.822 (0.037) <baselines_driu_stare_>`_
-     - `0.808 (0.046) <baselines_hed_stare_>`_
-     - `0.811 (0.039) <baselines_m2unet_stare_>`_
-     - `0.827 (0.041) <baselines_unet_stare_>`_
+     - `0.828 (0.039) <baselines_driu_stare_>`_
+     - `0.815 (0.047) <baselines_hed_stare_>`_
+     - `0.818 (0.035) <baselines_m2unet_stare_>`_
+     - `0.828 (0.050) <baselines_unet_stare_>`_
    * - :py:mod:`chasedb1 <bob.ip.binseg.configs.datasets.chasedb1.first_annotator>`
      - 0.768 (0.023)
-     - `0.810 (0.017) <baselines_driu_chase_>`_
-     - `0.806 (0.021) <baselines_hed_chase_>`_
-     - `0.798 (0.017) <baselines_m2unet_chase_>`_
-     - `0.803 (0.015) <baselines_unet_chase_>`_
-   * - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>`
+     - `0.812 (0.018) <baselines_driu_chase_>`_
+     - `0.806 (0.020) <baselines_hed_chase_>`_
+     - `0.798 (0.018) <baselines_m2unet_chase_>`_
+     - `0.807 (0.017) <baselines_unet_chase_>`_
+   * - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>` (1168x1648)
      -
-     - `0.802 (0.039) <baselines_driu_hrf_>`_
-     - `0.793 (0.041) <baselines_hed_hrf_>`_
-     - `0.785 (0.041) <baselines_m2unet_hrf_>`_
-     - `0.797 (0.038) <baselines_unet_hrf_>`_
+     - `0.808 (0.038) <baselines_driu_hrf_>`_
+     - `0.803 (0.040) <baselines_hed_hrf_>`_
+     - `0.796 (0.048) <baselines_m2unet_hrf_>`_
+     - `0.811 (0.039) <baselines_unet_hrf_>`_
+   * - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>` (2336x3296)
+     -
+     - `0.722 (0.073) <baselines_driu_hrf_>`_
+     - `0.703 (0.090) <baselines_hed_hrf_>`_
+     - `0.713 (0.143) <baselines_m2unet_hrf_>`_
+     - `0.756 (0.051) <baselines_unet_hrf_>`_
    * - :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.vessel>`
      -
-     - `0.823 (0.021) <baselines_driu_iostar_>`_
-     - `0.821 (0.022) <baselines_hed_iostar_>`_
-     - `0.816 (0.021) <baselines_m2unet_iostar_>`_
-     - `0.818 (0.019) <baselines_unet_iostar_>`_
-
+     - `0.825 (0.020) <baselines_driu_iostar_>`_
+     - `0.827 (0.020) <baselines_hed_iostar_>`_
+     - `0.820 (0.018) <baselines_m2unet_iostar_>`_
+     - `0.818 (0.020) <baselines_unet_iostar_>`_
 
 Precision-Recall (PR) Curves
 ----------------------------
@@ -90,7 +102,7 @@ its average value across all test set images, for a fixed threshold set to
    versus Sensitivity) with respect to the overall shape.  You may have a look
    at [DAVIS-2006]_ for details on the relationship between PR and ROC curves.
    For example, PR curves are not guaranteed to be monotonically increasing or
-   decreasing with the scanned thresholds (e.g. see M2U-Net on STARE dataset).
+   decreasing with the scanned thresholds.
 
    Each evaluated threshold in a combination of trained models and datasets is
    represented by a point in each curve.  Points are linearly interpolated to
@@ -125,16 +137,21 @@ its average value across all test set images, for a fixed threshold set to
       - .. figure:: hrf.png
            :align: center
            :scale: 50%
-           :alt: Model comparisons for hrf datasets
+           :alt: Model comparisons for hrf datasets (matching training resolution: 1168x1648)
 
-           :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>`: PR curve and F1 scores at T=0.5 (:download:`pdf <hrf.pdf>`)
+           :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>` (1168x1648): PR curve and F1 scores at T=0.5 (:download:`pdf <hrf.pdf>`)
     * - .. figure:: iostar-vessel.png
            :align: center
            :scale: 50%
            :alt: Model comparisons for iostar-vessel datasets
 
            :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.vessel>`: PR curve and F1 scores at T=0.5 (:download:`pdf <iostar-vessel.pdf>`)
-      -
+      - .. figure:: hrf-fullres.png
+           :align: center
+           :scale: 50%
+           :alt: Model comparisons for hrf datasets (double training resolution: 2336x3296)
+
+           :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>` (2336x3296): PR curve and F1 scores at T=0.5 (:download:`pdf <hrf-fullres.pdf>`)
 
 
 Remarks
@@ -149,7 +166,9 @@ Remarks
 * Where second annotator labels exist, model performance and variability seems
   on par with such annotations.  One possible exception is for CHASE-DB1, where
   models show consistently less variability than the second annotator.
-  Unfortunately, this cannot be conclusive.
+  Unfortunately, this is not conclusive.
+* Training at half resolution for HRF shows a small loss in performance (10 to
+  15%) when the high-resolution version is used as evaluation set.
 
 
 .. include:: ../../links.rst
diff --git a/doc/results/baselines/iostar-vessel.pdf b/doc/results/baselines/iostar-vessel.pdf
index 3edac941b638ec8f1e509168d7342a19c064a401..0c9f08d12f8a919dd0c381fafc4fc52fa0e50101 100644
Binary files a/doc/results/baselines/iostar-vessel.pdf and b/doc/results/baselines/iostar-vessel.pdf differ
diff --git a/doc/results/baselines/iostar-vessel.png b/doc/results/baselines/iostar-vessel.png
index 7d2df5f92909cc1a1d14b5d838bc4da887252680..a670c64c1e50c2f68b4f1ec95df85050d25887ab 100644
Binary files a/doc/results/baselines/iostar-vessel.png and b/doc/results/baselines/iostar-vessel.png differ
diff --git a/doc/results/baselines/stare.pdf b/doc/results/baselines/stare.pdf
index 2837debb0a0b7e833f86a0012d8ace4f25f4e1b7..641e6e2edd9da985ecf64067c9ddf97941a271ee 100644
Binary files a/doc/results/baselines/stare.pdf and b/doc/results/baselines/stare.pdf differ
diff --git a/doc/results/baselines/stare.png b/doc/results/baselines/stare.png
index 4b07bfdf2a327a941e37c34f78e9b2fb8847a0c7..76fe040196eaf04f64276f3fa5b0a5c2d2f4ac27 100644
Binary files a/doc/results/baselines/stare.png and b/doc/results/baselines/stare.png differ
diff --git a/doc/results/covd/index.rst b/doc/results/covd/index.rst
index 844d9b963871fd3afcf2495d260d6778a758b3f6..ad61be21bb40b34d922078c7bb341f7cd624b652 100644
--- a/doc/results/covd/index.rst
+++ b/doc/results/covd/index.rst
@@ -2,121 +2,80 @@
 
 .. _bob.ip.binseg.results.covd:
 
-.. todo::
+========================================
+ Combined Vessel Dataset (COVD) Results
+========================================
+
+
+F1 Scores (micro-level)
+-----------------------
+
+* Benchmark results for models: DRIU, HED, M2U-Net and U-Net.
+* Models are trained on a COVD **excluding** the target dataset, and tested on
+  the target dataset (**numbers in bold** indicate number of parameters per
+  model).  Models are trained for a fixed number of 1000 epochs, with a
+  learning rate of 0.001 until epoch 900 and then 0.0001 until the end of the
+  training.
+* Database and model resource configuration links (table top row and left
+  column) are linked to the originating configuration files used to obtain
+  these results.
+* Check `our paper`_ for details on the calculation of the F1 Score and standard
+  deviations (in parentheses).
+* Single performance numbers correspond to *a priori* performance indicators,
+  where the threshold is previously selected on the training set (COVD
+  excluding the target dataset)
+* You can cross check the analysis numbers provided in this table by
+  downloading this software package, the raw data, and running ``bob binseg
+  analyze`` providing the model URL as ``--weight`` parameter.
+* For comparison purposes, we provide "second-annotator" performances on the
+  same test set, where available.
 
-   This section is outdated and needs re-factoring.
-
-
-============================
- COVD- and COVD-SLL Results
-============================
-
-In addition to the M2U-Net architecture, we also evaluated the larger DRIU
-network and a variation of it that contains batch normalization (DRIU+BN) on
-COVD- (Combined Vessel Dataset from all training data minus target test set)
-and COVD-SSL (COVD- and Semi-Supervised Learning). Perhaps surprisingly, for
-the majority of combinations, the performance of the DRIU variants are roughly
-equal or worse to the ones obtained with the much smaller M2U-Net.  We
-anticipate that one reason for this could be overparameterization of large
-VGG-16 models that are pretrained on ImageNet.
-
-
-F1 Scores
----------
-
-Comparison of F1 Scores (micro-level and standard deviation) of DRIU and
-M2U-Net on COVD- and COVD-SSL.  Standard deviation across test-images in
-brackets.
 
 .. list-table::
-   :header-rows: 1
-
-   * - F1 score
-     - :py:mod:`DRIU <bob.ip.binseg.configs.models.driu>`/:py:mod:`DRIU@SSL <bob.ip.binseg.configs.models.driu_ssl>`
-     - :py:mod:`DRIU+BN <bob.ip.binseg.configs.models.driu_bn>`/:py:mod:`DRIU+BN@SSL <bob.ip.binseg.configs.models.driu_bn_ssl>`
-     - :py:mod:`M2U-Net <bob.ip.binseg.configs.models.m2unet>`/:py:mod:`M2U-Net@SSL <bob.ip.binseg.configs.models.m2unet_ssl>`
-   * - :py:mod:`COVD-DRIVE <bob.ip.binseg.configs.datasets.drive.covd>`
-     - 0.788 (0.018)
-     - 0.797 (0.019)
-     - `0.789 (0.018) <m2unet_covd-drive.pth>`_
-   * - :py:mod:`COVD-DRIVE+SSL <bob.ip.binseg.configs.datasets.drive.ssl>`
-     - 0.785 (0.018)
-     - 0.783 (0.019)
-     - `0.791 (0.014) <m2unet_covd-drive_ssl.pth>`_
-   * - :py:mod:`COVD-STARE <bob.ip.binseg.configs.datasets.stare.covd>`
-     - 0.778 (0.117)
-     - 0.778 (0.122)
-     - `0.812 (0.046) <m2unet_covd-stare.pth>`_
-   * - :py:mod:`COVD-STARE+SSL <bob.ip.binseg.configs.datasets.stare.ssl>`
-     - 0.788 (0.102)
-     - 0.811 (0.074)
-     - `0.820 (0.044) <m2unet_covd-stare_ssl.pth>`_
-   * - :py:mod:`COVD-CHASEDB1 <bob.ip.binseg.configs.datasets.chasedb1.covd>`
-     - 0.796 (0.027)
-     - 0.791 (0.025)
-     - `0.788 (0.024) <m2unet_covd-chasedb1.pth>`_
-   * - :py:mod:`COVD-CHASEDB1+SSL <bob.ip.binseg.configs.datasets.chasedb1.ssl>`
-     - 0.796 (0.024)
-     - 0.798 (0.025)
-     - `0.799 (0.026) <m2unet_covd-chasedb1_ssl.pth>`_
-   * - :py:mod:`COVD-HRF <bob.ip.binseg.configs.datasets.hrf.covd>`
-     - 0.799 (0.044)
-     - 0.800 (0.045)
-     - `0.802 (0.045) <m2unet_covd-hrf.pth>`_
-   * - :py:mod:`COVD-HRF+SSL <bob.ip.binseg.configs.datasets.hrf.ssl>`
-     - 0.799 (0.044)
-     - 0.784 (0.048)
-     - `0.797 (0.044) <m2unet_covd-hrf_ssl.pth>`_
-   * - :py:mod:`COVD-IOSTAR-VESSEL <bob.ip.binseg.configs.datasets.iostar.covd>`
-     - 0.791 (0.021)
-     - 0.777 (0.032)
-     - `0.793 (0.015) <m2unet_covd-iostar.pth>`_
-   * - :py:mod:`COVD-IOSTAR-VESSEL+SSL <bob.ip.binseg.configs.datasets.iostar.ssl>`
-     - 0.797 (0.017)
-     - 0.811 (0.074)
-     - `0.785 (0.018) <m2unet_covd-iostar_ssl.pth>`_
-
-
-M2U-Net Precision vs. Recall Curves
------------------------------------
-
-Precision vs. recall curves for each evaluated dataset.  Note that here the
-F1-score is calculated on a macro level (see paper for more details).
-
-.. figure:: pr_CHASEDB1.png
-   :scale: 50 %
-   :align: center
-   :alt: model comparisons
-
-   CHASE_DB1: Precision vs Recall curve and F1 scores
-
-.. figure:: pr_DRIVE.png
-   :scale: 50 %
-   :align: center
-   :alt: model comparisons
-
-   DRIVE: Precision vs Recall curve and F1 scores
-
-.. figure:: pr_HRF.png
-   :scale: 50 %
-   :align: center
-   :alt: model comparisons
-
-   HRF: Precision vs Recall curve and F1 scores
-
-.. figure:: pr_IOSTARVESSEL.png
-   :scale: 50 %
-   :align: center
-   :alt: model comparisons
-
-   IOSTAR: Precision vs Recall curve and F1 scores
-
-.. figure:: pr_STARE.png
-   :scale: 50 %
-   :align: center
-   :alt: model comparisons
-
-   STARE: Precision vs Recall curve and F1 scores
+   :header-rows: 2
+
+   * -
+     -
+     - :py:mod:`driu <bob.ip.binseg.configs.models.driu>`
+     - :py:mod:`hed <bob.ip.binseg.configs.models.hed>`
+     - :py:mod:`m2unet <bob.ip.binseg.configs.models.m2unet>`
+     - :py:mod:`unet <bob.ip.binseg.configs.models.unet>`
+   * - Dataset
+     - 2nd. Annot.
+     - 15M
+     - 14.7M
+     - 0.55M
+     - 25.8M
+   * - :py:mod:`drive <bob.ip.binseg.configs.datasets.drive.covd>`
+     - 0.788 (0.021)
+     - `0.768 (0.031) <covd_driu_drive_>`_
+     - `0.750 (0.036) <covd_hed_drive_>`_
+     - `0.771 (0.027) <covd_m2unet_drive_>`_
+     - `0.775 (0.029) <covd_unet_drive_>`_
+   * - :py:mod:`stare <bob.ip.binseg.configs.datasets.stare.covd>`
+     - 0.759 (0.028)
+     - `0.786 (0.100) <covd_driu_stare_>`_
+     - `0.738 (0.193) <covd_hed_stare_>`_
+     - `0.800 (0.080) <covd_m2unet_stare_>`_
+     - `0.806 (0.072) <covd_unet_stare_>`_
+   * - :py:mod:`chasedb1 <bob.ip.binseg.configs.datasets.chasedb1.covd>`
+     - 0.768 (0.023)
+     - `0.778 (0.031) <covd_driu_chase_>`_
+     - `0.777 (0.028) <covd_hed_chase_>`_
+     - `0.776 (0.031) <covd_m2unet_chase_>`_
+     - `0.779 (0.028) <covd_unet_chase_>`_
+   * - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.covd>`
+     -
+     - `0.742 (0.049) <covd_driu_hrf_>`_
+     - `0.719 (0.047) <covd_hed_hrf_>`_
+     - `0.735 (0.045) <covd_m2unet_hrf_>`_
+     - `0.746 (0.046) <covd_unet_hrf_>`_
+   * - :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.covd>`
+     -
+     - `0.790 (0.023) <covd_driu_iostar_>`_
+     - `0.792 (0.020) <covd_hed_iostar_>`_
+     - `0.788 (0.021) <covd_m2unet_iostar_>`_
+     - `0.783 (0.019) <covd_unet_iostar_>`_
 
 
 .. include:: ../../links.rst
diff --git a/doc/results/index.rst b/doc/results/index.rst
index f2d7e2ac8de271a3745b1de154d0b848ef70806b..ed6e9f05910d40d7061447a707e290baca17affa 100644
--- a/doc/results/index.rst
+++ b/doc/results/index.rst
@@ -18,6 +18,7 @@ strategy.
    baselines/index
    xtest/index
    covd/index
+   old/index
 
 
 .. include:: ../links.rst
diff --git a/doc/results/old/index.rst b/doc/results/old/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5577a4dcba6ccab624823cee08301db6aa7ae2bf
--- /dev/null
+++ b/doc/results/old/index.rst
@@ -0,0 +1,122 @@
+.. -*- coding: utf-8 -*-
+
+.. _bob.ip.binseg.results.old:
+
+.. todo::
+
+   This section is outdated and needs re-factoring.
+
+
+============================
+ COVD- and COVD-SLL Results
+============================
+
+In addition to the M2U-Net architecture, we also evaluated the larger DRIU
+network and a variation of it that contains batch normalization (DRIU+BN) on
+COVD- (Combined Vessel Dataset from all training data minus target test set)
+and COVD-SSL (COVD- and Semi-Supervised Learning). Perhaps surprisingly, for
+the majority of combinations, the performance of the DRIU variants are roughly
+equal or worse to the ones obtained with the much smaller M2U-Net.  We
+anticipate that one reason for this could be overparameterization of large
+VGG-16 models that are pretrained on ImageNet.
+
+
+F1 Scores
+---------
+
+Comparison of F1 Scores (micro-level and standard deviation) of DRIU and
+M2U-Net on COVD- and COVD-SSL.  Standard deviation across test-images in
+brackets.
+
+.. list-table::
+   :header-rows: 1
+
+   * - F1 score
+     - :py:mod:`DRIU <bob.ip.binseg.configs.models.driu>`/:py:mod:`DRIU@SSL <bob.ip.binseg.configs.models.driu_ssl>`
+     - :py:mod:`DRIU+BN <bob.ip.binseg.configs.models.driu_bn>`/:py:mod:`DRIU+BN@SSL <bob.ip.binseg.configs.models.driu_bn_ssl>`
+     - :py:mod:`M2U-Net <bob.ip.binseg.configs.models.m2unet>`/:py:mod:`M2U-Net@SSL <bob.ip.binseg.configs.models.m2unet_ssl>`
+   * - :py:mod:`COVD-DRIVE <bob.ip.binseg.configs.datasets.drive.covd>`
+     - 0.788 (0.018)
+     - 0.797 (0.019)
+     - `0.789 (0.018) <m2unet_covd-drive.pth>`_
+   * - :py:mod:`COVD-DRIVE+SSL <bob.ip.binseg.configs.datasets.drive.ssl>`
+     - 0.785 (0.018)
+     - 0.783 (0.019)
+     - `0.791 (0.014) <m2unet_covd-drive_ssl.pth>`_
+   * - :py:mod:`COVD-STARE <bob.ip.binseg.configs.datasets.stare.covd>`
+     - 0.778 (0.117)
+     - 0.778 (0.122)
+     - `0.812 (0.046) <m2unet_covd-stare.pth>`_
+   * - :py:mod:`COVD-STARE+SSL <bob.ip.binseg.configs.datasets.stare.ssl>`
+     - 0.788 (0.102)
+     - 0.811 (0.074)
+     - `0.820 (0.044) <m2unet_covd-stare_ssl.pth>`_
+   * - :py:mod:`COVD-CHASEDB1 <bob.ip.binseg.configs.datasets.chasedb1.covd>`
+     - 0.796 (0.027)
+     - 0.791 (0.025)
+     - `0.788 (0.024) <m2unet_covd-chasedb1.pth>`_
+   * - :py:mod:`COVD-CHASEDB1+SSL <bob.ip.binseg.configs.datasets.chasedb1.ssl>`
+     - 0.796 (0.024)
+     - 0.798 (0.025)
+     - `0.799 (0.026) <m2unet_covd-chasedb1_ssl.pth>`_
+   * - :py:mod:`COVD-HRF <bob.ip.binseg.configs.datasets.hrf.covd>`
+     - 0.799 (0.044)
+     - 0.800 (0.045)
+     - `0.802 (0.045) <m2unet_covd-hrf.pth>`_
+   * - :py:mod:`COVD-HRF+SSL <bob.ip.binseg.configs.datasets.hrf.ssl>`
+     - 0.799 (0.044)
+     - 0.784 (0.048)
+     - `0.797 (0.044) <m2unet_covd-hrf_ssl.pth>`_
+   * - :py:mod:`COVD-IOSTAR-VESSEL <bob.ip.binseg.configs.datasets.iostar.covd>`
+     - 0.791 (0.021)
+     - 0.777 (0.032)
+     - `0.793 (0.015) <m2unet_covd-iostar.pth>`_
+   * - :py:mod:`COVD-IOSTAR-VESSEL+SSL <bob.ip.binseg.configs.datasets.iostar.ssl>`
+     - 0.797 (0.017)
+     - 0.811 (0.074)
+     - `0.785 (0.018) <m2unet_covd-iostar_ssl.pth>`_
+
+
+M2U-Net Precision vs. Recall Curves
+-----------------------------------
+
+Precision vs. recall curves for each evaluated dataset.  Note that here the
+F1-score is calculated on a macro level (see paper for more details).
+
+.. figure:: pr_CHASEDB1.png
+   :scale: 50 %
+   :align: center
+   :alt: model comparisons
+
+   CHASE_DB1: Precision vs Recall curve and F1 scores
+
+.. figure:: pr_DRIVE.png
+   :scale: 50 %
+   :align: center
+   :alt: model comparisons
+
+   DRIVE: Precision vs Recall curve and F1 scores
+
+.. figure:: pr_HRF.png
+   :scale: 50 %
+   :align: center
+   :alt: model comparisons
+
+   HRF: Precision vs Recall curve and F1 scores
+
+.. figure:: pr_IOSTARVESSEL.png
+   :scale: 50 %
+   :align: center
+   :alt: model comparisons
+
+   IOSTAR: Precision vs Recall curve and F1 scores
+
+.. figure:: pr_STARE.png
+   :scale: 50 %
+   :align: center
+   :alt: model comparisons
+
+   STARE: Precision vs Recall curve and F1 scores
+
+
+.. include:: ../../links.rst
diff --git a/doc/results/covd/pr_CHASEDB1.png b/doc/results/old/pr_CHASEDB1.png
similarity index 100%
rename from doc/results/covd/pr_CHASEDB1.png
rename to doc/results/old/pr_CHASEDB1.png
diff --git a/doc/results/covd/pr_DRIVE.png b/doc/results/old/pr_DRIVE.png
similarity index 100%
rename from doc/results/covd/pr_DRIVE.png
rename to doc/results/old/pr_DRIVE.png
diff --git a/doc/results/covd/pr_HRF.png b/doc/results/old/pr_HRF.png
similarity index 100%
rename from doc/results/covd/pr_HRF.png
rename to doc/results/old/pr_HRF.png
diff --git a/doc/results/covd/pr_IOSTARVESSEL.png b/doc/results/old/pr_IOSTARVESSEL.png
similarity index 100%
rename from doc/results/covd/pr_IOSTARVESSEL.png
rename to doc/results/old/pr_IOSTARVESSEL.png
diff --git a/doc/results/covd/pr_STARE.png b/doc/results/old/pr_STARE.png
similarity index 100%
rename from doc/results/covd/pr_STARE.png
rename to doc/results/old/pr_STARE.png
diff --git a/doc/scripts/baselines.sh b/doc/scripts/baselines.sh
index 703ccf331122ca62ed850e9d94972999f7e8785b..e604abb13b2b00d2ad03bdc2ce45469134582c4a 100755
--- a/doc/scripts/baselines.sh
+++ b/doc/scripts/baselines.sh
@@ -15,14 +15,22 @@ function run() {
     cmd+=("-vv" "--device=${device}" ${1} ${2})
     cmd+=("--batch-size=${3}" "--output-folder=${OUTDIR}/${1}/${2}")
 
-    [ $# -gt 4 ] && cmd=(jman submit "--name=$(basename ${OUTDIR})-${1}-${2}" "--memory=24G" "--queue=${5}" -- "${cmd[@]}")
+    mkdir -pv ${OUTDIR}/${1}/${2}
 
-    "${cmd[@]}"
+    [ $# -gt 4 ] && cmd=(jman submit "--log-dir=${OUTDIR}/${1}/${2}" "--name=$(basename ${OUTDIR})-${1}-${2}" "--memory=24G" "--queue=${5}" -- "${cmd[@]}")
+
+    if [ $# -le 4 ]; then
+        # executing locally, capture stdout and stderr
+        ("${cmd[@]}" | tee "${OUTDIR}/${1}/${2}/stdout.log") 3>&1 1>&2 2>&3 | tee "${OUTDIR}/${1}/${2}/stderr.log"
+    else
+        "${cmd[@]}"
+    fi
 }
 
+
 # run/submit all baselines
-# comment out from "sgpu/gpu" to run locally
 # comment out from "cuda:0" to run on CPU
+# comment out from "sgpu/gpu" to run locally
 run m2unet drive         16 #cuda:0 #sgpu
 run hed    drive          8 #cuda:0 #sgpu
 run driu   drive          8 #cuda:0 #sgpu
diff --git a/doc/usage.rst b/doc/usage.rst
index b63cd4378782167b0c91b194167f30da1add4360..bb03e127a1289bf841ad9ef422d6dc26e561b002 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -61,7 +61,6 @@ modifying one of our configuration resources.
 
    experiment
    training
-   models
    evaluation