diff --git a/MANIFEST.in b/MANIFEST.in index cf1d827b4de456cfd9faa016ac18001948a7caf3..9f9ab6b6edc5c72b48a8a4002ffb71f210e95bd4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ include README.rst buildout.cfg COPYING version.txt requirements.txt -recursive-include doc *.rst *.png *.ico *.txt +recursive-include doc *.sh *.rst *.png *.pdf *.ico *.txt recursive-include bob *.json *.png diff --git a/bob/ip/binseg/configs/datasets/chasedb1/xtest.py b/bob/ip/binseg/configs/datasets/chasedb1/xtest.py new file mode 100644 index 0000000000000000000000000000000000000000..cade7b85c1a4f600fd1edcc8558e326c6a4c8d10 --- /dev/null +++ b/bob/ip/binseg/configs/datasets/chasedb1/xtest.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# coding=utf-8 + +"""CHASE-DB1 cross-evaluation dataset +""" + +from bob.ip.binseg.configs.datasets.drive.default import dataset as _drive +from bob.ip.binseg.configs.datasets.stare.ah import dataset as _stare +from bob.ip.binseg.configs.datasets.chasedb1.first_annotator import ( + dataset as _chase, +) +from bob.ip.binseg.configs.datasets.hrf.default import dataset as _hrf +from bob.ip.binseg.configs.datasets.iostar.vessel import dataset as _iostar + +dataset = { + "train": _chase["train"], + "test": _chase["test"], + "drive": _drive["test"], + "stare": _stare["test"], + "hrf": _hrf["test"], + "iostar": _iostar["test"], + } diff --git a/bob/ip/binseg/configs/datasets/drive/xtest.py b/bob/ip/binseg/configs/datasets/drive/xtest.py new file mode 100644 index 0000000000000000000000000000000000000000..188606b2ce50482feed9624bd40cc625ac8c38b2 --- /dev/null +++ b/bob/ip/binseg/configs/datasets/drive/xtest.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# coding=utf-8 + +"""DRIVE cross-evaluation dataset +""" + +from bob.ip.binseg.configs.datasets.drive.default import dataset as _drive +from bob.ip.binseg.configs.datasets.stare.ah import dataset as _stare +from bob.ip.binseg.configs.datasets.chasedb1.first_annotator import ( + dataset as _chase, +) +from bob.ip.binseg.configs.datasets.hrf.default import dataset as _hrf +from bob.ip.binseg.configs.datasets.iostar.vessel import dataset as _iostar + +dataset = { + "train": _drive["train"], + "test": _drive["test"], + "stare": _stare["test"], + "chasedb1": _chase["test"], + "hrf": _hrf["test"], + "iostar": _iostar["test"], + } diff --git a/bob/ip/binseg/configs/datasets/hrf/xtest.py b/bob/ip/binseg/configs/datasets/hrf/xtest.py new file mode 100644 index 0000000000000000000000000000000000000000..6f96074fb9709f38d4ce89296e242e065520676f --- /dev/null +++ b/bob/ip/binseg/configs/datasets/hrf/xtest.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# coding=utf-8 + +"""HRF cross-evaluation dataset +""" + +from bob.ip.binseg.configs.datasets.drive.default import dataset as _drive +from bob.ip.binseg.configs.datasets.stare.ah import dataset as _stare +from bob.ip.binseg.configs.datasets.chasedb1.first_annotator import ( + dataset as _chase, +) +from bob.ip.binseg.configs.datasets.hrf.default import dataset as _hrf +from bob.ip.binseg.configs.datasets.iostar.vessel import dataset as _iostar + +dataset = { + "train": _hrf["train"], + "test": _hrf["test"], + "drive": _drive["test"], + "stare": _stare["test"], + "chasedb1": _chase["test"], + "iostar": _iostar["test"], + } diff --git a/bob/ip/binseg/configs/datasets/iostar/vessel_xtest.py b/bob/ip/binseg/configs/datasets/iostar/vessel_xtest.py new file mode 100644 index 0000000000000000000000000000000000000000..0d6272751ee4dc916c6e46f0b55209e1263f4190 --- /dev/null +++ b/bob/ip/binseg/configs/datasets/iostar/vessel_xtest.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# coding=utf-8 + +"""IOSTAR vessel cross-evaluation dataset +""" + +from bob.ip.binseg.configs.datasets.drive.default import dataset as _drive +from bob.ip.binseg.configs.datasets.stare.ah import dataset as _stare +from bob.ip.binseg.configs.datasets.chasedb1.first_annotator import ( + dataset as _chase, +) +from bob.ip.binseg.configs.datasets.hrf.default import dataset as _hrf +from bob.ip.binseg.configs.datasets.iostar.vessel import dataset as _iostar + +dataset = { + "train": _iostar["train"], + "test": _iostar["test"], + "drive": _drive["test"], + "stare": _stare["test"], + "chasedb1": _chase["test"], + "hrf": _hrf["test"], + } diff --git a/bob/ip/binseg/configs/datasets/stare/xtest.py b/bob/ip/binseg/configs/datasets/stare/xtest.py new file mode 100644 index 0000000000000000000000000000000000000000..dcd773e872ac3eaeb49b2737e0e6d78c18578d55 --- /dev/null +++ b/bob/ip/binseg/configs/datasets/stare/xtest.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# coding=utf-8 + +"""STARE cross-evaluation dataset +""" + +from bob.ip.binseg.configs.datasets.drive.default import dataset as _drive +from bob.ip.binseg.configs.datasets.stare.ah import dataset as _stare +from bob.ip.binseg.configs.datasets.chasedb1.first_annotator import ( + dataset as _chase, +) +from bob.ip.binseg.configs.datasets.hrf.default import dataset as _hrf +from bob.ip.binseg.configs.datasets.iostar.vessel import dataset as _iostar + +dataset = { + "train": _stare["train"], + "test": _stare["test"], + "drive": _drive["test"], + "chasedb1": _chase["test"], + "hrf": _hrf["test"], + "iostar": _iostar["test"], + } diff --git a/bob/ip/binseg/script/analyze.py b/bob/ip/binseg/script/analyze.py index b18fe0f82448c4aa81afdde20913a30477787b72..bd66611d635c5a31b7163c0b69eb9da1ee5e955e 100644 --- a/bob/ip/binseg/script/analyze.py +++ b/bob/ip/binseg/script/analyze.py @@ -136,10 +136,9 @@ def analyze( This script is just a wrapper around the individual scripts for running prediction and evaluating FCN models. It organises the output in a - preset way: - - .. code-block:: text + preset way:: +\b └─ <output-folder>/ ├── predictions/ #the prediction outputs for the train/test set ├── overlayed/ #the overlayed outputs for the train/test set diff --git a/bob/ip/binseg/script/experiment.py b/bob/ip/binseg/script/experiment.py index db17008c1ff4ddc711cb834725d4b96fffd5d71f..cbbfd56f0754327b6bb93abde03b4718c387d930 100644 --- a/bob/ip/binseg/script/experiment.py +++ b/bob/ip/binseg/script/experiment.py @@ -233,10 +233,9 @@ def experiment( This script is just a wrapper around the individual scripts for training, running prediction, evaluating and comparing FCN model performance. It - organises the output in a preset way: - - .. code-block:: text + organises the output in a preset way:: +\b └─ <output-folder>/ ├── model/ #the generated model will be here ├── predictions/ #the prediction outputs for the train/test set diff --git a/bob/ip/binseg/script/predict.py b/bob/ip/binseg/script/predict.py index d06c7557080fda133d1a3fa1e621e672667a49e3..14c9cd7495d05aff04a6edd8f3e85be9ef1b6129 100644 --- a/bob/ip/binseg/script/predict.py +++ b/bob/ip/binseg/script/predict.py @@ -140,6 +140,8 @@ def predict(output_folder, model, dataset, batch_size, device, weight, logger.info(f"Skipping dataset '{k}' (not to be evaluated)") continue + logger.info(f"Running inference on '{k}' set...") + data_loader = DataLoader( dataset=v, batch_size=batch_size, diff --git a/bob/ip/binseg/utils/metric.py b/bob/ip/binseg/utils/metric.py index d38e80df89a3c225e31a5b4ce7a9e6c930a4f81c..903836f6ef17b231ecb942efc4156c3408cff885 100644 --- a/bob/ip/binseg/utils/metric.py +++ b/bob/ip/binseg/utils/metric.py @@ -30,7 +30,7 @@ class SmoothedValue: def base_metrics(tp, fp, tn, fn): """ Calculates Precision, Recall (=Sensitivity), Specificity, Accuracy, Jaccard and F1-score (Dice) - + Parameters ---------- @@ -39,7 +39,7 @@ def base_metrics(tp, fp, tn, fn): True positives fp : float - False positives + False positives tn : float True negatives @@ -52,7 +52,7 @@ def base_metrics(tp, fp, tn, fn): ------- metrics : list - + """ precision = tp / (tp + fp + ((tp + fp) == 0)) recall = tp / (tp + fn + ((tp + fn) == 0)) @@ -62,3 +62,26 @@ def base_metrics(tp, fp, tn, fn): f1_score = (2.0 * tp) / (2.0 * tp + fp + fn + ((2.0 * tp + fp + fn) == 0)) # f1_score = (2.0 * precision * recall) / (precision + recall) return [precision, recall, specificity, accuracy, jaccard, f1_score] + + +def auc(precision, recall): + """Calculates the area under the precision-recall curve (AUC) + + .. todo:: Integrate this to metrics reporting in compare.py + """ + + rec_unique, rec_unique_ndx = numpy.unique(recall, return_index=True) + + prec_unique = precision[rec_unique_ndx] + + if rec_unique.shape[0] > 1: + prec_interp = numpy.interp( + numpy.arange(0, 1, 0.01), + rec_unique, + prec_unique, + left=0.0, + right=0.0, + ) + return prec_interp.sum() * 0.01 + + return 0.0 diff --git a/conda/meta.yaml b/conda/meta.yaml index d696e7e205fb3a980aa1d0f928a306f1a98089e0..4685c1b569c4be237642cdfcb5da41f1b8d77d58 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -60,6 +60,7 @@ test: - sphinx - sphinx_rtd_theme - sphinxcontrib-programoutput + - graphviz about: summary: Binary Segmentation Benchmark Package for Bob diff --git a/doc/api.rst b/doc/api.rst index edd9b150da9a768053ad87ba5a630bbec04a7f70..c73aedd230a6c441868f46e6d6e5009e04a79be3 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -137,26 +137,31 @@ Datasets bob.ip.binseg.configs.datasets.chasedb1.first_annotator bob.ip.binseg.configs.datasets.chasedb1.second_annotator + bob.ip.binseg.configs.datasets.chasedb1.xtest bob.ip.binseg.configs.datasets.chasedb1.covd bob.ip.binseg.configs.datasets.chasedb1.ssl bob.ip.binseg.configs.datasets.drive.default bob.ip.binseg.configs.datasets.drive.second_annotator + bob.ip.binseg.configs.datasets.drive.xtest bob.ip.binseg.configs.datasets.drive.covd bob.ip.binseg.configs.datasets.drive.ssl bob.ip.binseg.configs.datasets.hrf.default + bob.ip.binseg.configs.datasets.hrf.xtest bob.ip.binseg.configs.datasets.hrf.default_fullres bob.ip.binseg.configs.datasets.hrf.covd bob.ip.binseg.configs.datasets.hrf.ssl bob.ip.binseg.configs.datasets.iostar.vessel + bob.ip.binseg.configs.datasets.iostar.vessel_xtest bob.ip.binseg.configs.datasets.iostar.optic_disc bob.ip.binseg.configs.datasets.iostar.covd bob.ip.binseg.configs.datasets.iostar.ssl bob.ip.binseg.configs.datasets.stare.ah bob.ip.binseg.configs.datasets.stare.vk + bob.ip.binseg.configs.datasets.stare.xtest bob.ip.binseg.configs.datasets.stare.covd bob.ip.binseg.configs.datasets.stare.ssl diff --git a/doc/baselines.rst b/doc/baselines.rst deleted file mode 100644 index 3c22af04c729976cff1ee549e4139a838f340989..0000000000000000000000000000000000000000 --- a/doc/baselines.rst +++ /dev/null @@ -1,59 +0,0 @@ -.. -*- coding: utf-8 -*- - -.. _bob.ip.binseg.results.baselines: - -=================== - Baseline Results -=================== - -F1 Scores (micro-level) ------------------------ - -* Benchmark results for models: DRIU, HED, M2U-Net and U-Net. -* Models are trained and tested on the same dataset using the - train-test split as indicated in :ref:`bob.ip.binseg.configs.datasets` (i.e., - these are *intra*-datasets tests) -* Standard-deviations across all test images are indicated in brakets -* Database and Model links (table top row and left column) are linked to the - originating configuration files used to obtain these results. -* For some results, the actual deep neural network models are provided (by - clicking on the associated F1 Score). -* Check `our paper`_ for details on the calculation of the F1 Score and standard - deviations. - -.. list-table:: - :header-rows: 1 - - * - F1 (std) - - :py:mod:`driu <bob.ip.binseg.configs.models.driu>` - - :py:mod:`hed <bob.ip.binseg.configs.models.hed>` - - :py:mod:`m2unet <bob.ip.binseg.configs.models.m2unet>` - - :py:mod:`unet <bob.ip.binseg.configs.models.unet>` - * - :py:mod:`chasedb1 <bob.ip.binseg.configs.datasets.chasedb1.first_annotator>` - - `0.810 (0.021) <driu_chasedb1.pth_>`_ - - 0.810 (0.022) - - `0.802 (0.019) <m2unet_chasedb1.pth_>`_ - - 0.812 (0.020) - * - :py:mod:`drive <bob.ip.binseg.configs.datasets.drive.default>` - - `0.820 (0.014) <driu_drive.pth_>`_ - - 0.817 (0.013) - - `0.803 (0.014) <m2unet_drive.pth_>`_ - - 0.822 (0.015) - * - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>` - - `0.783 (0.055) <driu_hrf.pth_>`_ - - 0.783 (0.058) - - `0.780 (0.057) <m2unet_hrf.pth_>`_ - - 0.788 (0.051) - * - :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.vessel>` - - `0.825 (0.020) <driu_iostar.pth_>`_ - - 0.825 (0.020) - - `0.817 (0.020) <m2unet_iostar.pth_>`_ - - 0.818 (0.019) - * - :py:mod:`stare <bob.ip.binseg.configs.datasets.stare.ah>` - - `0.827 (0.037) <driu_stare.pth_>`_ - - 0.823 (0.037) - - `0.815 (0.041) <m2unet_stare.pth_>`_ - - 0.829 (0.042) - - -.. include:: links.rst diff --git a/doc/cli.rst b/doc/cli.rst index e5b261d6735221d6cfe40e35256ce734e29c4f9b..588cc997c85333e3fd2cf948dd9bd650f26b8a8e 100644 --- a/doc/cli.rst +++ b/doc/cli.rst @@ -91,10 +91,52 @@ You may use this command to locally copy a resource file so you can change it. .. command-output:: bob binseg config copy --help +.. _bob.ip.binseg.cli.combined: + +Running and Analyzing Experiments +--------------------------------- + +These applications run a combined set of steps in one go. They work well with +our preset :ref:`configuration resources <bob.ip.binseg.cli.config.list.all>`. + + +.. _bob.ip.binseg.cli.experiment: + +Running a Full Experiment Cycle +=============================== + +This command can run training, prediction, evaluation and comparison from a +single, multi-step application. + +.. command-output:: bob binseg experiment --help + + +.. _bob.ip.binseg.cli.analyze: + +Running Complete Experiment Analysis +==================================== + +This command can run prediction, evaluation and comparison from a +single, multi-step application. + +.. command-output:: bob binseg analyze --help + + +.. _bob.ip.binseg.cli.single: + +Single-Step Applications +------------------------ + +These applications allow finer control over the experiment cycle. They also +work well with our preset :ref:`configuration resources +<bob.ip.binseg.cli.config.list.all>`, but allow finer control on the input +datasets. + + .. _bob.ip.binseg.cli.train: Training FCNs -------------- +============= Training creates of a new PyTorch_ model. This model can be used for evaluation tests or for inference. @@ -104,8 +146,8 @@ evaluation tests or for inference. .. _bob.ip.binseg.cli.predict: -FCN Inference -------------- +Prediction with FCNs +==================== Inference takes as input a PyTorch_ model and generates output probabilities as HDF5 files. The probability map has the same size as the input and indicates, @@ -118,7 +160,7 @@ from less probable (0.0) to more probable (1.0). .. _bob.ip.binseg.cli.evaluate: FCN Performance Evaluation --------------------------- +========================== Evaluation takes inference results and compares it to ground-truth, generating a series of analysis figures which are useful to understand model performance. @@ -129,7 +171,7 @@ a series of analysis figures which are useful to understand model performance. .. _bob.ip.binseg.cli.compare: Performance Comparison ----------------------- +====================== Performance comparison takes the performance evaluation results and generate combined figures and tables that compare results of multiple systems. @@ -137,15 +179,4 @@ combined figures and tables that compare results of multiple systems. .. command-output:: bob binseg compare --help -.. _bob.ip.binseg.cli.experiment: - -Running Complete Experiments ----------------------------- - -This command can run training, prediction, evaluation and comparison from a -single, multi-step application. - -.. command-output:: bob binseg experiment --help - - .. include:: links.rst diff --git a/doc/experiment.rst b/doc/experiment.rst index a3f2594426e01b3bed29b3eb92117f2a51201adf..9050af004419e2ca7fb3a2d41a72c245bcc448eb 100644 --- a/doc/experiment.rst +++ b/doc/experiment.rst @@ -8,9 +8,9 @@ We provide an :ref:`aggregator command called "experiment" <bob.ip.binseg.cli.experiment>` that runs training, followed by prediction, -evaluation and comparison. After running, you -will be able to find results from model fitting, prediction, evaluation and -comparison under a single output directory. +evaluation and comparison. After running, you will be able to find results +from model fitting, prediction, evaluation and comparison under a single output +directory. For example, to train a Mobile V2 U-Net architecture on the STARE dataset, evaluate both train and test set performances, output prediction maps and @@ -20,3 +20,173 @@ overlay analysis, together with a performance curve, run the following: $ bob binseg experiment -vv m2unet stare --batch-size=16 --overlayed # check results in the "results" folder + +You may run the system on a GPU by using the ``--device=cuda:0`` option. + + +Using your own dataset +====================== + +To use your own dataset, we recommend you read our instructions at +:py:mod:`bob.ip.binseg.configs.datasets.csv`, and setup one or more CSV file +describing input data and ground-truth (segmentation maps), and potential test +data. Then, prepare a configuration file by copying our configuration example +and edit it to apply the required transforms to your input data. Once you are +happy with the result, use it in place of one of our datasets: + +.. code-block:: sh + + $ bob binseg config copy csv-dataset-example mydataset.py + # edit mydataset following instructions + $ bob binseg train ... mydataset.py ... + + +Baseline Benchmarks +=================== + +The following table describes recommended batch sizes for 24Gb of RAM GPU +card, for supervised training of baselines. Use it like this: + +.. code-block:: sh + + # change <model> and <dataset> by one of items bellow + $ bob binseg experiment -vv <model> <dataset> --batch-size=<see-table> --device="cuda:0" + # check results in the "results" folder + +.. list-table:: + + * - **Models / Datasets** + - :py:mod:`drive <bob.ip.binseg.configs.datasets.drive.default>` + - :py:mod:`stare <bob.ip.binseg.configs.datasets.stare.ah>` + - :py:mod:`chasedb1 <bob.ip.binseg.configs.datasets.chasedb1.first_annotator>` + - :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.vessel>` + - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>` + * - :py:mod:`unet <bob.ip.binseg.configs.models.unet>` + - 4 + - 2 + - 2 + - 2 + - 1 + * - :py:mod:`hed <bob.ip.binseg.configs.models.hed>` + - 8 + - 4 + - 4 + - 4 + - 1 + * - :py:mod:`driu <bob.ip.binseg.configs.models.driu>` / :py:mod:`driu-bn <bob.ip.binseg.configs.models.driu_bn>` + - 8 + - 5 + - 4 + - 4 + - 1 + * - :py:mod:`m2unet <bob.ip.binseg.configs.models.m2unet>` + - 16 + - 6 + - 6 + - 6 + - 1 + + +.. tip:: + + Instead of the default configurations, you can pass the full path of your + customized dataset and model files. You may :ref:`copy any of the existing + configuration resources <bob.ip.binseg.cli.config.copy>` and change them + locally. Once you're happy, you may use the newly created files directly on + your command line. For example, suppose you wanted to slightly change the + DRIVE pre-processing pipeline. You could do the following: + + .. code-block:: bash + + $ bob binseg config copy drive my_drive_remix.py + # edit my_drive_remix.py to your needs + $ bob binseg train -vv <model> ./my_drive_remix.py + + +.. _bob.ip.binseg.gridtk-tip: + +.. tip:: + + If you are at Idiap, you may install the package ``gridtk`` (``conda install + gridtk``) on your environment, and submit the job like this: + + .. code-block:: sh + + $ jman submit --queue=gpu --memory=24G --name=myjob -- bob binseg train --device='cuda:0' ... #paste the rest of the command-line + +.. _bob.ip.binseg.baseline-script: + +The :download:`following shell script <scripts/baselines.sh>` can run the +various baselines described above and place results in a single directory: + +.. literalinclude:: scripts/baselines.sh + :language: bash + +You will find results obtained running these baselines :ref:`further in this +guide <bob.ip.binseg.results.baselines>`. + + +Combined Vessel Dataset (COVD) +============================== + +The following table describes recommended batch sizes for 24Gb of RAM GPU card, +for supervised training of COVD- systems. Use it like this: + +.. code-block:: sh + + # change <model> and <dataset> by one of items bellow + $ bob binseg train -vv <model> <dataset> --batch-size=<see-table> --device="cuda:0" + +.. list-table:: + + * - **Models / Datasets** + - :py:mod:`drive-covd <bob.ip.binseg.configs.datasets.drive.covd>` + - :py:mod:`stare-covd <bob.ip.binseg.configs.datasets.stare.covd>` + - :py:mod:`chasedb1-covd <bob.ip.binseg.configs.datasets.chasedb1.covd>` + - :py:mod:`iostar-vessel-covd <bob.ip.binseg.configs.datasets.iostar.covd>` + - :py:mod:`hrf-covd <bob.ip.binseg.configs.datasets.hrf.covd>` + * - :py:mod:`driu <bob.ip.binseg.configs.models.driu>` / :py:mod:`driu-bn <bob.ip.binseg.configs.models.driu_bn>` + - 4 + - 4 + - 2 + - 2 + - 2 + * - :py:mod:`m2unet <bob.ip.binseg.configs.models.m2unet>` + - 8 + - 4 + - 4 + - 4 + - 4 + + +Combined Vessel Dataset (COVD) and Semi-Supervised Learning (SSL) +================================================================= + +The following table describes recommended batch sizes for 24Gb of RAM GPU +card, for semi-supervised learning of COVD- systems. Use it like this: + +.. code-block:: sh + + # change <model> and <dataset> by one of items bellow + $ bob binseg train -vv --ssl <model> <dataset> --batch-size=<see-table> --device="cuda:0" + +.. list-table:: + + * - **Models / Datasets** + - :py:mod:`drive-ssl <bob.ip.binseg.configs.datasets.drive.ssl>` + - :py:mod:`stare-ssl <bob.ip.binseg.configs.datasets.stare.ssl>` + - :py:mod:`chasedb1-ssl <bob.ip.binseg.configs.datasets.chasedb1.ssl>` + - :py:mod:`iostar-vessel-ssl <bob.ip.binseg.configs.datasets.iostar.ssl>` + - :py:mod:`hrf-ssl <bob.ip.binseg.configs.datasets.hrf.ssl>` + * - :py:mod:`driu-ssl <bob.ip.binseg.configs.models.driu_ssl>` / :py:mod:`driu-bn-ssl <bob.ip.binseg.configs.models.driu_bn_ssl>` + - 4 + - 4 + - 2 + - 1 + - 1 + * - :py:mod:`m2unet-ssl <bob.ip.binseg.configs.models.m2unet_ssl>` + - 4 + - 4 + - 2 + - 2 + - 2 diff --git a/doc/framework.dot b/doc/framework.dot new file mode 100644 index 0000000000000000000000000000000000000000..50bfce5a2ff0030ee75c0138efd884c4e8f3228b --- /dev/null +++ b/doc/framework.dot @@ -0,0 +1,60 @@ +digraph framework { + + graph [ + rankdir=LR, + ]; + edge [ + fontname=Helvetica, + fontsize=12, + fontcolor=blue, + minlen=2, + labeldistance=2.5, + ]; + + node [ + fontname=Helvetica, + fontsize=12, + fontcolor=black, + shape=record, + style="filled,rounded", + fillcolor=grey92, + ]; + + dataset [ + label="<train>\nTraining\n\n|<test>\nTest\n\n", + fillcolor=yellow, + style="filled", + ]; + + {rank = min; dataset;} + + subgraph cluster_experiment { + label=<<b>experiment</b>>; + shape=record; + style="filled,rounded"; + fillcolor=white; + train; + + subgraph cluster_analyze { + label=<<b>analyze</b>>; + predict; + evaluate; + compare; + } + } + + figure, table [ + fillcolor=lightblue, + style="filled", + ]; + {rank = max; figure; table; } + + dataset:train -> train [headlabel="sample + label",labelangle=30]; + dataset:test -> predict [headlabel="sample",labelangle=30]; + train -> predict [headlabel="model"]; + dataset:test -> evaluate [headlabel="label"]; + predict -> evaluate [headlabel="probabilities ",labelangle=-30]; + evaluate -> compare [headlabel="metrics"]; + compare -> figure; + compare -> table; +} diff --git a/doc/index.rst b/doc/index.rst index 0cccd77fa80845cdee84d195c67b61c586265050..cae4c4d0bb7680f950583d8d40ac39c6ccd114d2 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -43,7 +43,7 @@ User Guide setup usage - results + results/index acknowledgements references datasets diff --git a/doc/results/baselines/index.rst b/doc/results/baselines/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..64a361c1ef947bcfdfb74e9be4a3f940f75b20a6 --- /dev/null +++ b/doc/results/baselines/index.rst @@ -0,0 +1,118 @@ +.. -*- coding: utf-8 -*- + +.. _bob.ip.binseg.results.baselines: + +=================== + Baseline Results +=================== + +F1 Scores (micro-level) +----------------------- + +* Benchmark results for models: DRIU, HED, M2U-Net and U-Net. +* Models are trained and tested on the same dataset (numbers in parenthesis + indicate number of parameters per model) +* Database and model resource configuration links (table top row and left + column) are linked to the originating configuration files used to obtain + these results. +* Check `our paper`_ for details on the calculation of the F1 Score and standard + deviations (in parentheses). +* Single performance numbers correspond to *a priori* performance indicators, + where the threshold is previously selected on the training set +* You can cross check the analysis numbers provided in this table by + downloading this software package, the raw data, and running ``bob binseg + analyze`` providing the model URL as ``--weight`` parameter. Otherwise, we + also provide `CSV files + <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/>`_ + with the estimated performance per threshold (100 + steps) per subset. +* For comparison purposes, we provide "second-annotator" performances on the + same test set, where available. + + +.. list-table:: + :header-rows: 1 + + * - Dataset + - 2nd. Annot. + - :py:mod:`driu (15M) <bob.ip.binseg.configs.models.driu>` + - :py:mod:`hed (14.7M) <bob.ip.binseg.configs.models.hed>` + - :py:mod:`m2unet (0.55M) <bob.ip.binseg.configs.models.m2unet>` + - :py:mod:`unet (25.8M) <bob.ip.binseg.configs.models.unet>` + * - :py:mod:`drive <bob.ip.binseg.configs.datasets.drive.default>` + - 0.788 (0.021) + - `0.819 (0.016) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu/drive/model.pth>`_ + - `0.806 (0.015) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed/drive/model.pth>`_ + - `0.804 (0.014) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet/drive/model.pth>`_ + - `0.823 (0.015) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet/drive/model.pth>`_ + * - :py:mod:`stare <bob.ip.binseg.configs.datasets.stare.ah>` + - 0.759 (0.028) + - `0.824 (0.037) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu/stare/model.pth>`_ + - `0.810 (0.045) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed/stare/model.pth>`_ + - `0.811 (0.039) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet/stare/model.pth>`_ + - `0.828 (0.041) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet/stare/model.pth>`_ + * - :py:mod:`chasedb1 <bob.ip.binseg.configs.datasets.chasedb1.first_annotator>` + - 0.768 0.023 + - `0.811 (0.018) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu/chasedb1/model.pth>`_ + - `0.806 (0.021) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed/chasedb1/model.pth>`_ + - `0.801 (0.018) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet/chasedb1/model.pth>`_ + - `0.802 (0.015) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet/chasedb1/model.pth>`_ + * - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>` + - + - `0.802 (0.039) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu/hrf/model.pth>`_ + - `0.793 (0.041) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed/hrf/model.pth>`_ + - `0.796 (0.043) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet/hrf/model.pth>`_ + - `0.798 (0.038) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet/hrf/model.pth>`_ + * - :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.vessel>` + - + - `0.825 (0.021) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/driu/iostar-vessel/model.pth>`_ + - `0.822 (0.023) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/hed/iostar-vessel/model.pth>`_ + - `0.817 (0.021) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/m2unet/iostar-vessel/model.pth>`_ + - `0.818 (0.019) <https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/baselines/unet/iostar-vessel/model.pth>`_ + + +Precision-Recall (PR) Curves +---------------------------- + +Next, you will find the PR plots showing confidence intervals, for the various +models explored, on a per dataset arrangement. All curves correspond to test +set performances. Single performance figures (F1-micro scores) correspond to +its average value across all test set images, for a fixed threshold set to +``0.5``. + +.. figure:: drive.png + :align: center + :alt: Model comparisons for drive datasets + + :py:mod:`drive <bob.ip.binseg.configs.datasets.drive.default>`: PR curve and F1 scores at T=0.5 (:download:`pdf <drive.pdf>`) + + +.. figure:: stare.png + :align: center + :alt: Model comparisons for stare datasets + + :py:mod:`stare <bob.ip.binseg.configs.datasets.stare.ah>`: PR curve and F1 scores at T=0.5 (:download:`pdf <stare.pdf>`) + + +.. figure:: chasedb1.png + :align: center + :alt: Model comparisons for chasedb1 datasets + + :py:mod:`chasedb1 <bob.ip.binseg.configs.datasets.chasedb1.first_annotator>`: PR curve and F1 scores at T=0.5 (:download:`pdf <chasedb1.pdf>`) + + +.. figure:: hrf.png + :align: center + :alt: Model comparisons for hrf datasets + + :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>`: PR curve and F1 scores at T=0.5 (:download:`pdf <hrf.pdf>`) + + +.. figure:: iostar-vessel.png + :align: center + :alt: Model comparisons for iostar-vessel datasets + + :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.vessel>`: PR curve and F1 scores at T=0.5 (:download:`pdf <iostar-vessel.pdf>`) + + +.. include:: ../../links.rst diff --git a/doc/covd.rst b/doc/results/covd/index.rst similarity index 95% rename from doc/covd.rst rename to doc/results/covd/index.rst index 4dcb1b9df7609924ccc0f58c9ea40dec447c08aa..84c407bc027d7456953348625eb7addf7522266b 100644 --- a/doc/covd.rst +++ b/doc/results/covd/index.rst @@ -78,38 +78,40 @@ M2U-Net Precision vs. Recall Curves Precision vs. recall curves for each evaluated dataset. Note that here the F1-score is calculated on a macro level (see paper for more details). -.. figure:: img/pr_CHASEDB1.png +.. figure:: pr_CHASEDB1.png :scale: 50 % :align: center :alt: model comparisons CHASE_DB1: Precision vs Recall curve and F1 scores -.. figure:: img/pr_DRIVE.png +.. figure:: pr_DRIVE.png :scale: 50 % :align: center :alt: model comparisons DRIVE: Precision vs Recall curve and F1 scores -.. figure:: img/pr_HRF.png +.. figure:: pr_HRF.png :scale: 50 % :align: center :alt: model comparisons HRF: Precision vs Recall curve and F1 scores -.. figure:: img/pr_IOSTARVESSEL.png +.. figure:: pr_IOSTARVESSEL.png :scale: 50 % :align: center :alt: model comparisons IOSTAR: Precision vs Recall curve and F1 scores -.. figure:: img/pr_STARE.png +.. figure:: pr_STARE.png :scale: 50 % :align: center :alt: model comparisons STARE: Precision vs Recall curve and F1 scores + +.. include:: ../../links.rst diff --git a/doc/img/pr_CHASEDB1.png b/doc/results/covd/pr_CHASEDB1.png similarity index 100% rename from doc/img/pr_CHASEDB1.png rename to doc/results/covd/pr_CHASEDB1.png diff --git a/doc/img/pr_DRIVE.png b/doc/results/covd/pr_DRIVE.png similarity index 100% rename from doc/img/pr_DRIVE.png rename to doc/results/covd/pr_DRIVE.png diff --git a/doc/img/pr_HRF.png b/doc/results/covd/pr_HRF.png similarity index 100% rename from doc/img/pr_HRF.png rename to doc/results/covd/pr_HRF.png diff --git a/doc/img/pr_IOSTARVESSEL.png b/doc/results/covd/pr_IOSTARVESSEL.png similarity index 100% rename from doc/img/pr_IOSTARVESSEL.png rename to doc/results/covd/pr_IOSTARVESSEL.png diff --git a/doc/img/pr_STARE.png b/doc/results/covd/pr_STARE.png similarity index 100% rename from doc/img/pr_STARE.png rename to doc/results/covd/pr_STARE.png diff --git a/doc/results.rst b/doc/results/index.rst similarity index 85% rename from doc/results.rst rename to doc/results/index.rst index 0fcc3f46070a8ee710cecc2a966aa8a0410d7236..f2d7e2ac8de271a3745b1de154d0b848ef70806b 100644 --- a/doc/results.rst +++ b/doc/results/index.rst @@ -15,8 +15,9 @@ strategy. .. toctree:: :maxdepth: 2 - baselines - covd + baselines/index + xtest/index + covd/index -.. include:: links.rst +.. include:: ../links.rst diff --git a/doc/scripts/baselines.sh b/doc/scripts/baselines.sh new file mode 100755 index 0000000000000000000000000000000000000000..6f82b1aff351acd0a217a5e84221110483a75b4b --- /dev/null +++ b/doc/scripts/baselines.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +# Runs all of our baselines + +# set output directory and location of "bob" executable +OUTDIR=/path/to/output/diretory +BOB=/path/to/bob/execuble + +# run <modelconfig> <dbconfig> <batchsize> [<device> [<queue>]] +function run() { + local device="cpu" + [ $# -gt 3 ] && device="${4}" + + local cmd=(${BOB} binseg experiment) + cmd+=("-vv" "--device=${device}" ${1} ${2}) + cmd+=("--batch-size=${3}" "--output-folder=${OUTDIR}/${1}/${2}") + + # notice this assumes gridtk is installed + [ $# -gt 4 ] && cmd=(jman submit "--memory=24G" "--queue=${5}" -- "${cmd[@]}") + + "${cmd[@]}" +} + +# run/submit all baselines +# comment out from "sgpu/gpu" to run locally +# comment out from "cuda:0" to run on CPU +run m2unet stare 6 #cuda:0 #sgpu +run hed stare 4 #cuda:0 #sgpu +run driu stare 5 #cuda:0 #sgpu +run unet stare 2 #cuda:0 #sgpu +run m2unet drive 16 #cuda:0 #sgpu +run hed drive 8 #cuda:0 #sgpu +run driu drive 8 #cuda:0 #sgpu +run unet drive 4 #cuda:0 #sgpu +run m2unet iostar-vessel 6 #cuda:0 #sgpu +run hed iostar-vessel 4 #cuda:0 #sgpu +run driu iostar-vessel 4 #cuda:0 #sgpu +run unet iostar-vessel 2 #cuda:0 #sgpu +run m2unet chasedb1 6 #cuda:0 #sgpu +run hed chasedb1 4 #cuda:0 #sgpu +run driu chasedb1 4 #cuda:0 #sgpu +run unet chasedb1 2 #cuda:0 #sgpu +run m2unet hrf 1 #cuda:0 #gpu +run hed hrf 1 #cuda:0 #gpu +run driu hrf 1 #cuda:0 #gpu +run unet hrf 1 #cuda:0 #gpu diff --git a/doc/scripts/xtest.sh b/doc/scripts/xtest.sh new file mode 100755 index 0000000000000000000000000000000000000000..5e409854160719dbab01a712a17d7f5cd60c51c9 --- /dev/null +++ b/doc/scripts/xtest.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# Runs cross database tests + +BOB=$HOME/work/bob/bob.ip.binseg/bin/bob + +model=m2unet +for d in drive stare chasedb1 iostar-vessel hrf; do + for m in driu hed m2unet unet; do + cmd=(${BOB} binseg analyze -vv ${model} "${d}-xtest") + cmd+=("--weight=${m}/${d}/model/model_final.pth") + cmd+=("--output-folder=${m}/${d}/xtest") + "${cmd[@]}" + done +done diff --git a/doc/training.rst b/doc/training.rst index 693561286fd8cf4a19ccddc0e75050743b155d7e..254cac14cfafcaf6f67698c2d41d62096c204d14 100644 --- a/doc/training.rst +++ b/doc/training.rst @@ -19,158 +19,3 @@ message containing more detailed instructions. size (``--batch``). -Baseline Benchmarks -=================== - -The following table describes recommended batch sizes for 24Gb of RAM GPU -card, for supervised training of baselines. Use it like this: - -.. code-block:: sh - - # change <model> and <dataset> by one of items bellow - $ bob binseg train -vv <model> <dataset> --batch-size=<see-table> --device="cuda:0" - # check results in the "results" folder - -.. list-table:: - - * - **Models / Datasets** - - :py:mod:`drive <bob.ip.binseg.configs.datasets.drive.default>` - - :py:mod:`stare <bob.ip.binseg.configs.datasets.stare.ah>` - - :py:mod:`chasedb1 <bob.ip.binseg.configs.datasets.chasedb1.first_annotator>` - - :py:mod:`iostar-vessel <bob.ip.binseg.configs.datasets.iostar.vessel>` - - :py:mod:`hrf <bob.ip.binseg.configs.datasets.hrf.default>` - * - :py:mod:`unet <bob.ip.binseg.configs.models.unet>` - - 4 - - 2 - - 2 - - 2 - - 1 - * - :py:mod:`hed <bob.ip.binseg.configs.models.hed>` - - 8 - - 4 - - 4 - - 4 - - 1 - * - :py:mod:`driu <bob.ip.binseg.configs.models.driu>` / :py:mod:`driu-bn <bob.ip.binseg.configs.models.driu_bn>` - - 8 - - 5 - - 4 - - 4 - - 1 - * - :py:mod:`m2unet <bob.ip.binseg.configs.models.m2unet>` - - 16 - - 6 - - 6 - - 6 - - 1 - - -.. tip:: - - Instead of the default configurations, you can pass the full path of your - customized dataset and model files. You may :ref:`copy any of the existing - configuration resources <bob.ip.binseg.cli.config.copy>` and change them - locally. Once you're happy, you may use the newly created files directly on - your command line. For example, suppose you wanted to slightly change the - DRIVE pre-processing pipeline. You could do the following: - - .. code-block:: bash - - $ bob binseg config copy drive my_drive_remix.py - # edit my_drive_remix.py to your needs - $ bob binseg train -vv <model> ./my_drive_remix.py - - -.. _bob.ip.binseg.gridtk-tip: - -.. tip:: - - If you are at Idiap, you may install the package ``gridtk`` (``conda install - gridtk``) on your environment, and submit the job like this: - - .. code-block:: sh - - $ jman submit --queue=gpu --memory=24G --name=myjob -- bob binseg train --device='cuda:0' ... #paste the rest of the command-line - - -Combined Vessel Dataset (COVD) -============================== - -The following table describes recommended batch sizes for 24Gb of RAM GPU card, -for supervised training of COVD- systems. Use it like this: - -.. code-block:: sh - - # change <model> and <dataset> by one of items bellow - $ bob binseg train -vv <model> <dataset> --batch-size=<see-table> --device="cuda:0" - -.. list-table:: - - * - **Models / Datasets** - - :py:mod:`drive-covd <bob.ip.binseg.configs.datasets.drive.covd>` - - :py:mod:`stare-covd <bob.ip.binseg.configs.datasets.stare.covd>` - - :py:mod:`chasedb1-covd <bob.ip.binseg.configs.datasets.chasedb1.covd>` - - :py:mod:`iostar-vessel-covd <bob.ip.binseg.configs.datasets.iostar.covd>` - - :py:mod:`hrf-covd <bob.ip.binseg.configs.datasets.hrf.covd>` - * - :py:mod:`driu <bob.ip.binseg.configs.models.driu>` / :py:mod:`driu-bn <bob.ip.binseg.configs.models.driu_bn>` - - 4 - - 4 - - 2 - - 2 - - 2 - * - :py:mod:`m2unet <bob.ip.binseg.configs.models.m2unet>` - - 8 - - 4 - - 4 - - 4 - - 4 - - -Combined Vessel Dataset (COVD) and Semi-Supervised Learning (SSL) -================================================================= - -The following table describes recommended batch sizes for 24Gb of RAM GPU -card, for semi-supervised learning of COVD- systems. Use it like this: - -.. code-block:: sh - - # change <model> and <dataset> by one of items bellow - $ bob binseg train -vv --ssl <model> <dataset> --batch-size=<see-table> --device="cuda:0" - -.. list-table:: - - * - **Models / Datasets** - - :py:mod:`drive-ssl <bob.ip.binseg.configs.datasets.drive.ssl>` - - :py:mod:`stare-ssl <bob.ip.binseg.configs.datasets.stare.ssl>` - - :py:mod:`chasedb1-ssl <bob.ip.binseg.configs.datasets.chasedb1.ssl>` - - :py:mod:`iostar-vessel-ssl <bob.ip.binseg.configs.datasets.iostar.ssl>` - - :py:mod:`hrf-ssl <bob.ip.binseg.configs.datasets.hrf.ssl>` - * - :py:mod:`driu-ssl <bob.ip.binseg.configs.models.driu_ssl>` / :py:mod:`driu-bn-ssl <bob.ip.binseg.configs.models.driu_bn_ssl>` - - 4 - - 4 - - 2 - - 1 - - 1 - * - :py:mod:`m2unet-ssl <bob.ip.binseg.configs.models.m2unet_ssl>` - - 4 - - 4 - - 2 - - 2 - - 2 - - -Using your own dataset -====================== - -To use your own dataset, we recommend you read our instructions at -:py:mod:`bob.ip.binseg.configs.datasets.csv`, and setup one or more CSV file -describing input data and ground-truth (segmentation maps). Then, prepare a -configuration file by copying our configuration example and edit it to apply -the required transforms to your input data. Once you are happy with the -result, use it in place of one of our datasets: - -.. code-block:: sh - - $ bob binseg config copy csv-dataset-example mydataset.py - # edit mydataset following instructions - $ bob binseg train ... mydataset.py ... diff --git a/doc/usage.rst b/doc/usage.rst index c9967139f77c1e64bd634b641bf795ffdea73c55..b63cd4378782167b0c91b194167f30da1add4360 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -13,25 +13,38 @@ semantic binary segmentation with support for the following activities: that is trained to reconstruct annotations (pre-segmented binary maps), automatically, via error back propagation. The objective of this phase is to produce an FCN model. -* Inference: The FCN is used to generate vessel map predictions +* Inference (prediction): The FCN is used to generate vessel map predictions * Evaluation: Vessel map predictions are used evaluate FCN performance against - test data, generate ROC curves or visualize prediction results overlayed on + provided annotations, or visualize prediction results overlayed on the original raw images. +* Comparison: Use evaluation results to compare performance as you like. -Each application is implemented as a :ref:`command-line utility -<bob.ip.binseg.cli>`, that is configurable using :ref:`Bob's extensible -configuration framework <bob.extension.framework>`. In essence, each -command-line option may be provided as a variable with the same name in a -Python file. Each file may combine any number of variables that are pertinent -to an application. +Whereas we provide :ref:`command-line interfaces (CLI) +<bob.ip.binseg.cli.single>` that implement each of the phases above, we also +provide command aggregators that can :ref:`run all of the phases +<bob.ip.binseg.cli.combined>`. Both interfaces are configurable using +:ref:`Bob's extensible configuration framework <bob.extension.framework>`. In +essence, each command-line option may be provided as a variable with the same +name in a Python file. Each file may combine any number of variables that are +pertinent to an application. .. tip:: For reproducibility, we recommend you stick to configuration files when parameterizing our CLI. Notice some of the options in the CLI interface (e.g. ``--dataset``) cannot be passed via the actual command-line as it - requires a :py:class:`concrete PyTorch dataset instance - <torch.utils.data.dataset.Dataset>`. + may require complex Python types that cannot be synthetized in a single + input parameter. + + +The following flowchart represents the various experiment phases and output +results that can be produced for each of our CLI interfaces (rounded white +rectangles). Processing subproducts (marked in blue), are stored on disk by +the end of each step. + +.. graphviz:: framework.dot + :caption: Framework actions and CLI + We provide a number of :ref:`preset configuration files <bob.ip.binseg.cli.config.list.all>` that can be used in one or more of the @@ -46,10 +59,10 @@ modifying one of our configuration resources. .. toctree:: :maxdepth: 2 + experiment training models evaluation - experiment .. include:: links.rst diff --git a/setup.py b/setup.py index 35c6ff669fa31b2c44fc333b187f317182e17234..11b08d12091a66218622e888af7ac1911f04e8a9 100644 --- a/setup.py +++ b/setup.py @@ -61,23 +61,27 @@ setup( # drive dataset "drive = bob.ip.binseg.configs.datasets.drive.default", "drive-2nd = bob.ip.binseg.configs.datasets.drive.second_annotator", + "drive-xtest = bob.ip.binseg.configs.datasets.drive.xtest", "drive-covd = bob.ip.binseg.configs.datasets.drive.covd", "drive-ssl = bob.ip.binseg.configs.datasets.drive.ssl", # stare dataset "stare = bob.ip.binseg.configs.datasets.stare.ah", "stare-2nd = bob.ip.binseg.configs.datasets.stare.vk", + "stare-xtest = bob.ip.binseg.configs.datasets.stare.xtest", "stare-covd = bob.ip.binseg.configs.datasets.stare.covd", "stare-ssl = bob.ip.binseg.configs.datasets.stare.ssl", # iostar "iostar-vessel = bob.ip.binseg.configs.datasets.iostar.vessel", + "iostar-vessel-xtest = bob.ip.binseg.configs.datasets.iostar.vessel_xtest", "iostar-disc = bob.ip.binseg.configs.datasets.iostar.optic_disc", "iostar-vessel-covd = bob.ip.binseg.configs.datasets.iostar.covd", "iostar-vessel-ssl = bob.ip.binseg.configs.datasets.iostar.ssl", # hrf "hrf = bob.ip.binseg.configs.datasets.hrf.default", + "hrf-xtest = bob.ip.binseg.configs.datasets.hrf.xtest", "hrf-highres = bob.ip.binseg.configs.datasets.hrf.default_fullres", "hrf-covd = bob.ip.binseg.configs.datasets.hrf.covd", "hrf-ssl = bob.ip.binseg.configs.datasets.hrf.ssl", @@ -85,6 +89,7 @@ setup( # chase-db1 "chasedb1 = bob.ip.binseg.configs.datasets.chasedb1.first_annotator", "chasedb1-2nd = bob.ip.binseg.configs.datasets.chasedb1.second_annotator", + "chasedb1-xtest = bob.ip.binseg.configs.datasets.chasedb1.xtest", "chasedb1-covd = bob.ip.binseg.configs.datasets.chasedb1.covd", "chasedb1-ssl = bob.ip.binseg.configs.datasets.chasedb1.ssl",