diff --git a/.gitignore b/.gitignore index 777eb99ff50db820cb3e6f4b9d21a4093112cc15..294c3e81810b240542b0e3e4eb9d83e466763675 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,6 @@ output .ipynb_checkpoints */.ipynb_checkpoints/* submitted.sql3 -logs/ -results/ +./logs/ +./results/ .coverage diff --git a/doc/results/baselines/chasedb1.pdf b/doc/results/baselines/chasedb1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3139798feaa762bc1c9a11e8a6716a5ae21e9a63 Binary files /dev/null and b/doc/results/baselines/chasedb1.pdf differ diff --git a/doc/results/baselines/chasedb1.png b/doc/results/baselines/chasedb1.png new file mode 100644 index 0000000000000000000000000000000000000000..0f760b2a097c2b42fa0e067a5a9920d0a525788c Binary files /dev/null and b/doc/results/baselines/chasedb1.png differ diff --git a/doc/results/baselines/drive.pdf b/doc/results/baselines/drive.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3fba78c86bf8c0f20d8ac2612b87a67cdad0b20b Binary files /dev/null and b/doc/results/baselines/drive.pdf differ diff --git a/doc/results/baselines/drive.png b/doc/results/baselines/drive.png new file mode 100644 index 0000000000000000000000000000000000000000..9ecae7f663dde499a0cd3999e0aeaecf17aab778 Binary files /dev/null and b/doc/results/baselines/drive.png differ diff --git a/doc/results/baselines/hrf.pdf b/doc/results/baselines/hrf.pdf new file mode 100644 index 0000000000000000000000000000000000000000..03bd6092d55df80b9f35c04e23caec13828d80f3 Binary files /dev/null and b/doc/results/baselines/hrf.pdf differ diff --git a/doc/results/baselines/hrf.png b/doc/results/baselines/hrf.png new file mode 100644 index 0000000000000000000000000000000000000000..1608a3e7fdcab0bac4321b85945eb6766abb5827 Binary files /dev/null and b/doc/results/baselines/hrf.png differ diff --git a/doc/results/baselines/iostar-vessel.pdf b/doc/results/baselines/iostar-vessel.pdf new file mode 100644 index 0000000000000000000000000000000000000000..141ea565273373311ca3960cdb994ba2f2c6f58c Binary files /dev/null and b/doc/results/baselines/iostar-vessel.pdf differ diff --git a/doc/results/baselines/iostar-vessel.png b/doc/results/baselines/iostar-vessel.png new file mode 100644 index 0000000000000000000000000000000000000000..7da4802492e1eac053166080e5b19090c2499f59 Binary files /dev/null and b/doc/results/baselines/iostar-vessel.png differ diff --git a/doc/results/baselines/stare.pdf b/doc/results/baselines/stare.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8bafd654a22dac56458355e048ede26ce4ce7cd1 Binary files /dev/null and b/doc/results/baselines/stare.pdf differ diff --git a/doc/results/baselines/stare.png b/doc/results/baselines/stare.png new file mode 100644 index 0000000000000000000000000000000000000000..7a632af2907d4c0f4d48d23072dcc97c189cf83f Binary files /dev/null and b/doc/results/baselines/stare.png differ diff --git a/doc/results/xtest/index.rst b/doc/results/xtest/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..37e5acb1dcd01afa7693c3300ea26d91a9f8f68d --- /dev/null +++ b/doc/results/xtest/index.rst @@ -0,0 +1,90 @@ +.. -*- coding: utf-8 -*- + +.. _bob.ip.binseg.results.xtest: + +====================== + Cross-Database Tests +====================== + +F1 Scores (micro-level) +----------------------- + +* Benchmark results for models: DRIU, HED, M2U-Net and U-Net. +* Models are trained and tested on the same dataset (numbers in parenthesis + indicate number of parameters per model), and then evaluated across the test + sets of other datasets. +* You can cross check the analysis numbers provided in this table by + downloading this software package, the raw data, and running ``bob binseg + analyze`` providing the model URL as ``--weight`` parameter, and then the + ``-xtest`` resource variant of the dataset the model was trained on. For + example, to run cross-evaluation tests for the DRIVE dataset, use the + configuration resource :py:mod:`drive-xtest + <bob.ip.binseg.configs.datasets.drive.xtest>`. Otherwise, we + also provide `CSV files + <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/xtest/>`_ + with the estimated performance per threshold (100 steps) per subset. +* For comparison purposes, we provide "second-annotator" performances on the + same test set, where available. +* We only show results for DRIU (~15.4 million parameters) and M2U-Net (~550 + thousand parameters) as these models seem to represent the performance + extremes according to our :ref:`baseline analysis + <bob.ip.binseg.results.baselines>`. You may run analysis on the other models + by downloading them from our website (via the ``--weight`` parameter on the + :ref:`analyze script <bob.ip.binseg.cli.analyze>`). This script may help you + in this task, provided you created a directory structure as suggested by + :ref:`our baseline script <bob.ip.binseg.baseline-script>`: + + .. literalinclude:: ../../scripts/xtest.sh + :language: bash + + +DRIU +==== + + +.. list-table:: + :header-rows: 1 + + * - Model / X-Test + - :py:mod:`drive <bob.ip.binseg.configs.datasets.drive.xtest>` + - :py:mod:`stare <bob.ip.binseg.configs.datasets.stare.xtest>` + - :py:mod:`chasedb1 <bob.ip.binseg.configs.datasets.chasedb1.xtest>` + - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.xtest>` + - :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.vessel_xtest>` + * - `drive <baselines_driu_drive_>`_ + - + - + - + - + - + * - `stare <baselines_driu_stare_>`_ + - + - + - + - + - + * - `chasedb1 <baselines_driu_chase_>`_ + - + - + - + - + - + * - `hrf <baselines_driu_hrf_>`_ + - + - + - + - + - + * - `iostar-vessel <baselines_driu_iostar_>`_ + - + - + - + - + - + + +Precision-Recall (PR) Curves +---------------------------- + + +.. include:: ../../links.rst