diff --git a/.gitignore b/.gitignore
index 777eb99ff50db820cb3e6f4b9d21a4093112cc15..294c3e81810b240542b0e3e4eb9d83e466763675 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,6 +27,6 @@ output
 .ipynb_checkpoints
 */.ipynb_checkpoints/*
 submitted.sql3
-logs/
-results/
+./logs/
+./results/
 .coverage
diff --git a/doc/results/baselines/chasedb1.pdf b/doc/results/baselines/chasedb1.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..3139798feaa762bc1c9a11e8a6716a5ae21e9a63
Binary files /dev/null and b/doc/results/baselines/chasedb1.pdf differ
diff --git a/doc/results/baselines/chasedb1.png b/doc/results/baselines/chasedb1.png
new file mode 100644
index 0000000000000000000000000000000000000000..0f760b2a097c2b42fa0e067a5a9920d0a525788c
Binary files /dev/null and b/doc/results/baselines/chasedb1.png differ
diff --git a/doc/results/baselines/drive.pdf b/doc/results/baselines/drive.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..3fba78c86bf8c0f20d8ac2612b87a67cdad0b20b
Binary files /dev/null and b/doc/results/baselines/drive.pdf differ
diff --git a/doc/results/baselines/drive.png b/doc/results/baselines/drive.png
new file mode 100644
index 0000000000000000000000000000000000000000..9ecae7f663dde499a0cd3999e0aeaecf17aab778
Binary files /dev/null and b/doc/results/baselines/drive.png differ
diff --git a/doc/results/baselines/hrf.pdf b/doc/results/baselines/hrf.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..03bd6092d55df80b9f35c04e23caec13828d80f3
Binary files /dev/null and b/doc/results/baselines/hrf.pdf differ
diff --git a/doc/results/baselines/hrf.png b/doc/results/baselines/hrf.png
new file mode 100644
index 0000000000000000000000000000000000000000..1608a3e7fdcab0bac4321b85945eb6766abb5827
Binary files /dev/null and b/doc/results/baselines/hrf.png differ
diff --git a/doc/results/baselines/iostar-vessel.pdf b/doc/results/baselines/iostar-vessel.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..141ea565273373311ca3960cdb994ba2f2c6f58c
Binary files /dev/null and b/doc/results/baselines/iostar-vessel.pdf differ
diff --git a/doc/results/baselines/iostar-vessel.png b/doc/results/baselines/iostar-vessel.png
new file mode 100644
index 0000000000000000000000000000000000000000..7da4802492e1eac053166080e5b19090c2499f59
Binary files /dev/null and b/doc/results/baselines/iostar-vessel.png differ
diff --git a/doc/results/baselines/stare.pdf b/doc/results/baselines/stare.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..8bafd654a22dac56458355e048ede26ce4ce7cd1
Binary files /dev/null and b/doc/results/baselines/stare.pdf differ
diff --git a/doc/results/baselines/stare.png b/doc/results/baselines/stare.png
new file mode 100644
index 0000000000000000000000000000000000000000..7a632af2907d4c0f4d48d23072dcc97c189cf83f
Binary files /dev/null and b/doc/results/baselines/stare.png differ
diff --git a/doc/results/xtest/index.rst b/doc/results/xtest/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..37e5acb1dcd01afa7693c3300ea26d91a9f8f68d
--- /dev/null
+++ b/doc/results/xtest/index.rst
@@ -0,0 +1,90 @@
+.. -*- coding: utf-8 -*-
+
+.. _bob.ip.binseg.results.xtest:
+
+======================
+ Cross-Database Tests
+======================
+
+F1 Scores (micro-level)
+-----------------------
+
+* Benchmark results for models: DRIU, HED, M2U-Net and U-Net.
+* Models are trained and tested on the same dataset (numbers in parenthesis
+  indicate number of parameters per model), and then evaluated across the test
+  sets of other datasets.
+* You can cross check the analysis numbers provided in this table by
+  downloading this software package, the raw data, and running ``bob binseg
+  analyze`` providing the model URL as ``--weight`` parameter, and then the
+  ``-xtest`` resource variant of the dataset the model was trained on.  For
+  example, to run cross-evaluation tests for the DRIVE dataset, use the
+  configuration resource :py:mod:`drive-xtest
+  <bob.ip.binseg.configs.datasets.drive.xtest>`.  Otherwise, we
+  also provide `CSV files
+  <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/xtest/>`_
+  with the estimated performance per threshold (100 steps) per subset.
+* For comparison purposes, we provide "second-annotator" performances on the
+  same test set, where available.
+* We only show results for DRIU (~15.4 million parameters) and M2U-Net (~550
+  thousand parameters) as these models seem to represent the performance
+  extremes according to our :ref:`baseline analysis
+  <bob.ip.binseg.results.baselines>`.  You may run analysis on the other models
+  by downloading them from our website (via the ``--weight`` parameter on the
+  :ref:`analyze script <bob.ip.binseg.cli.analyze>`).  This script may help you
+  in this task, provided you created a directory structure as suggested by
+  :ref:`our baseline script <bob.ip.binseg.baseline-script>`:
+
+  .. literalinclude:: ../../scripts/xtest.sh
+     :language: bash
+
+
+DRIU
+====
+
+
+.. list-table::
+   :header-rows: 1
+
+   * - Model / X-Test
+     - :py:mod:`drive <bob.ip.binseg.configs.datasets.drive.xtest>`
+     - :py:mod:`stare <bob.ip.binseg.configs.datasets.stare.xtest>`
+     - :py:mod:`chasedb1 <bob.ip.binseg.configs.datasets.chasedb1.xtest>`
+     - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.xtest>`
+     - :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.vessel_xtest>`
+   * - `drive <baselines_driu_drive_>`_
+     -
+     -
+     -
+     -
+     -
+   * - `stare <baselines_driu_stare_>`_
+     -
+     -
+     -
+     -
+     -
+   * - `chasedb1 <baselines_driu_chase_>`_
+     -
+     -
+     -
+     -
+     -
+   * - `hrf <baselines_driu_hrf_>`_
+     -
+     -
+     -
+     -
+     -
+   * - `iostar-vessel <baselines_driu_iostar_>`_
+     -
+     -
+     -
+     -
+     -
+
+
+Precision-Recall (PR) Curves
+----------------------------
+
+
+.. include:: ../../links.rst