diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bfaf903f06ccf2d5dca4cf9e853db06e01b53ae4..ceeefc7bd2643ffe37fc752990cccd4197811811 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,3 +6,6 @@ include: - project: biosignal/software/dev-profile file: /gitlab/python.yml + +tests: + before_script: diff --git a/MANIFEST.in b/MANIFEST.in index 8c3a16b9a0d991e865e4aefed31ec4dde3bdcd75..321d5167dc635f2831c6a89df7a2bcb436da3e81 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,6 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -recursive-include doc *.rst *.ico *.png +recursive-include doc *.rst *.png recursive-include tests *.py *.png *.csv *.json recursive-include src/ptbench/data *.json.bz2 diff --git a/doc/_static/style.css b/doc/_static/style.css new file mode 100644 index 0000000000000000000000000000000000000000..b07bdb1b51a79acbb63d2a9b62860153ebf3741b --- /dev/null +++ b/doc/_static/style.css @@ -0,0 +1,3 @@ +.wy-nav-content { + max-width: none; +} diff --git a/doc/_templates/config.rst b/doc/_templates/config.rst new file mode 100644 index 0000000000000000000000000000000000000000..3a1f3d38f834a9180521eb514aea2a5eef4a85b9 --- /dev/null +++ b/doc/_templates/config.rst @@ -0,0 +1,3 @@ +{% include "autosummary/module.rst" %} +.. literalinclude:: ../../../../src/{{ fullname.replace(".", "/") }}.py + :start-at: import diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html new file mode 100644 index 0000000000000000000000000000000000000000..3e44f4a314727bea575cba269197f9a44584cf85 --- /dev/null +++ b/doc/_templates/layout.html @@ -0,0 +1,4 @@ +{% extends "!layout.html" %} +{% block extrahead %} + <link href="{{ pathto("_static/style.css", True) }}" rel="stylesheet" type="text/css"> +{% endblock %} diff --git a/doc/api.rst b/doc/api.rst index f00cde9b55af71a77110b45ed6df27db5d731109..7314356253fefa05fc6de69fadefccbd61471aca 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -12,10 +12,104 @@ This section includes information for using the Python API of ``ptbench``. +.. _ptbench.api.data: + +Data Methods +------------ + +Auxiliary classes and methods to define raw dataset iterators. + +.. autosummary:: + :toctree: api/data + + ptbench.data.sample + ptbench.data.dataset + ptbench.data.utils + ptbench.data.loader + ptbench.data.transforms + ptbench.configs.datasets + + +.. _ptbench.api.data.raw: + +Raw Dataset Access +------------------ + +Direct data-access through iterators. + +.. autosummary:: + :toctree: api/data/raw + + ptbench.data.hivtb_RS + ptbench.data.tbpoc + ptbench.data.montgomery_RS + ptbench.data.padchest + ptbench.data.hivtb + ptbench.data.indian_RS + ptbench.data.shenzhen_RS + ptbench.data.tbpoc_RS + ptbench.data.shenzhen + ptbench.data.montgomery + ptbench.data.indian + ptbench.data.nih_cxr14_re + ptbench.data.padchest_RS + + +.. _ptbench.api.models: + +Models +------ + +CNN and other models implemented. + +.. autosummary:: + :toctree: api/models + + ptbench.models.alexnet + ptbench.models.densenet + ptbench.models.densenet_rs + ptbench.models.logistic_regression + ptbench.models.normalizer + ptbench.models.pasa + ptbench.models.signs_to_tb + + +.. _ptbench.api.engines: + +Command engines +--------------- + +Functions to actuate on the data. + +.. autosummary:: + :toctree: api/engine + + ptbench.engine.trainer + ptbench.engine.predictor + ptbench.engine.evaluator + + +.. _ptbench.api.utils: + +Various utilities +----------------- + +Reusable auxiliary functions. + .. autosummary:: - :toctree: api + :toctree: api/utils - ptbench + ptbench.utils.checkpointer + ptbench.utils.download + ptbench.utils.grad_cams + ptbench.utils.measure + ptbench.utils.model_serialization + ptbench.utils.model_zoo + ptbench.utils.plot + ptbench.utils.rc + ptbench.utils.resources + ptbench.utils.summary + ptbench.utils.table .. include:: links.rst diff --git a/doc/catalog.json b/doc/catalog.json new file mode 100644 index 0000000000000000000000000000000000000000..3561c742e9c93e50e3d2c9b35354c3c0f315c87a --- /dev/null +++ b/doc/catalog.json @@ -0,0 +1,9 @@ +{ + "exposed": { + "versions": { + "stable": "https://www.idiap.ch/software/bob/docs/bob/exposed/stable/sphinx/", + "latest": "https://www.idiap.ch/software/bob/docs/bob/exposed/main/sphinx/" + }, + "sources": {} + } +} diff --git a/doc/cli.rst b/doc/cli.rst new file mode 100644 index 0000000000000000000000000000000000000000..950e3cdc6182d330d5bdd40586a51eae71560e8a --- /dev/null +++ b/doc/cli.rst @@ -0,0 +1,20 @@ +.. Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +.. +.. SPDX-License-Identifier: GPL-3.0-or-later + +.. _ptbench.cli: + +======================== + Command-line Interface +======================== + +This section contains an overview of command-line applications shipped with +this package. + + +.. click:: ptbench.scripts.cli:cli + :prog: ptbench + :nested: full + + +.. include:: links.rst diff --git a/doc/conf.py b/doc/conf.py index a52fb6a39db44bac9e04a62ed4c17bee8e5b0120..222fc25512302339e3af2167e8eb60fa2ebe795d 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -22,6 +22,11 @@ extensions = [ "sphinx.ext.napoleon", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", + "auto_intersphinx", + "sphinx_autodoc_typehints", + "sphinx_copybutton", + "sphinx_inline_tabs", + "sphinx_click", ] # Be picky about warnings @@ -109,6 +114,18 @@ autodoc_default_options = { "show-inheritance": True, } -intersphinx_mapping = { - "python": ("https://docs.python.org/3", None), -} +auto_intersphinx_packages = [ + "matplotlib", + "numpy", + "pandas", + "pillow", + "psutil", + "torch", + "torchvision", + ("exposed", "latest"), + ("python", "3"), +] +auto_intersphinx_catalog = "catalog.json" + +# Add our private index (for extras and fixes) +intersphinx_mapping = dict(extras=("", "extras.inv")) diff --git a/doc/config.rst b/doc/config.rst new file mode 100644 index 0000000000000000000000000000000000000000..70d73db09eccb8d9c5a7f7fb1c352b6b0b56764f --- /dev/null +++ b/doc/config.rst @@ -0,0 +1,112 @@ +.. Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +.. +.. SPDX-License-Identifier: GPL-3.0-or-later + +.. _ptbench.config: + +Preset Configurations +--------------------- + +This module contains preset configurations for baseline CNN architectures and +datasets. + + +Models +====== + +.. autosummary:: + :toctree: api/configs/models + :template: config.rst + + ptbench.configs.models.alexnet + ptbench.configs.models.alexnet_pretrained + ptbench.configs.models.densenet + ptbench.configs.models.densenet_pretrained + ptbench.configs.models.logistic_regression + ptbench.configs.models.pasa + ptbench.configs.models.signs_to_tb + ptbench.configs.models_datasets.densenet_rs + + +.. _ptbench.configs.datasets: + +Datasets +======== + +Datasets include iterative accessors to raw data +(:ref:`ptbench.setup.datasets`) including data pre-processing and augmentation, +if applicable. Use these datasets for training and evaluating your models. + +.. autosummary:: + :toctree: api/configs/datasets + :template: config.rst + + ptbench.configs.datasets.indian.default + ptbench.configs.datasets.indian.rgb + ptbench.configs.datasets.indian_RS.default + ptbench.configs.datasets.mc_ch.default + ptbench.configs.datasets.mc_ch.rgb + ptbench.configs.datasets.mc_ch_RS.default + ptbench.configs.datasets.mc_ch_in.default + ptbench.configs.datasets.mc_ch_in.rgb + ptbench.configs.datasets.mc_ch_in_RS.default + ptbench.configs.datasets.mc_ch_in_pc.default + ptbench.configs.datasets.mc_ch_in_pc.rgb + ptbench.configs.datasets.mc_ch_in_pc_RS.default + ptbench.configs.datasets.montgomery.default + ptbench.configs.datasets.montgomery.rgb + ptbench.configs.datasets.montgomery_RS.default + ptbench.configs.datasets.nih_cxr14_re.cardiomegaly_idiap + ptbench.configs.datasets.nih_cxr14_re.default + ptbench.configs.datasets.nih_cxr14_re.idiap + ptbench.configs.datasets.nih_cxr14_re_pc.idiap + ptbench.configs.datasets.padchest.cardiomegaly_idiap + ptbench.configs.datasets.padchest.idiap + ptbench.configs.datasets.padchest.no_tb_idiap + ptbench.configs.datasets.padchest.tb_idiap + ptbench.configs.datasets.padchest.tb_idiap_rgb + ptbench.configs.datasets.padchest_RS.tb_idiap + ptbench.configs.datasets.shenzhen.default + ptbench.configs.datasets.shenzhen.rgb + ptbench.configs.datasets.shenzhen_RS.default + + +.. _ptbench.configs.datasets.folds: + +Cross-Validation Datasets +========================= + +We support cross-validation with precise preset folds. In this section, you +will find the configuration for the first fold (fold-0) for all supported +datasets. Nine other folds are available for every configuration (from 1 to +9), making up 10 folds per supported dataset. + + +.. autosummary:: + :toctree: api/configs/datasets + :template: config.rst + + ptbench.configs.datasets.hivtb.fold_0 + ptbench.configs.datasets.hivtb.fold_0_rgb + ptbench.configs.datasets.hivtb_RS.fold_0 + ptbench.configs.datasets.indian.fold_0 + ptbench.configs.datasets.indian.fold_0_rgb + ptbench.configs.datasets.indian_RS.fold_0 + ptbench.configs.datasets.mc_ch.fold_0 + ptbench.configs.datasets.mc_ch.fold_0_rgb + ptbench.configs.datasets.mc_ch_RS.fold_0 + ptbench.configs.datasets.mc_ch_in.fold_0 + ptbench.configs.datasets.mc_ch_in.fold_0_rgb + ptbench.configs.datasets.mc_ch_in_RS.fold_0 + ptbench.configs.datasets.montgomery.fold_0 + ptbench.configs.datasets.montgomery.fold_0_rgb + ptbench.configs.datasets.montgomery_RS.fold_0 + ptbench.configs.datasets.shenzhen.fold_0 + ptbench.configs.datasets.shenzhen.fold_0_rgb + ptbench.configs.datasets.shenzhen_RS.fold_0 + ptbench.configs.datasets.tbpoc.fold_0 + ptbench.configs.datasets.tbpoc.fold_0_rgb + ptbench.configs.datasets.tbpoc_RS.fold_0 + + +.. include:: links.rst diff --git a/doc/extras.inv b/doc/extras.inv new file mode 100644 index 0000000000000000000000000000000000000000..88973215f3227495564e345c34603dad42a9b532 --- /dev/null +++ b/doc/extras.inv @@ -0,0 +1,6 @@ +# Sphinx inventory version 2 +# Project: extras +# Version: stable +# The remainder of this file is compressed using zlib. +xÚEËÁ € лSti¼² * + PÒ~MØÞÞ߃è–îlYšƒ†f‡h5êÃWÙ¯i¡tóÌ}àÅNôäo°!¬%ò]B-4OÎŒ ã \ No newline at end of file diff --git a/doc/extras.txt b/doc/extras.txt new file mode 100644 index 0000000000000000000000000000000000000000..e827f8fa4af7ea94634d044dec1d282029babcc4 --- /dev/null +++ b/doc/extras.txt @@ -0,0 +1,5 @@ +# Sphinx inventory version 2 +# Project: extras +# Version: stable +# The remainder of this file is compressed using zlib. +torchvision.transforms py:module 1 https://pytorch.org/vision/stable/transforms.html - diff --git a/doc/img/direct_vs_indirect.png b/doc/img/direct_vs_indirect.png new file mode 100644 index 0000000000000000000000000000000000000000..786faa3591ff076bbc0b582d4aae49656cecbd0f Binary files /dev/null and b/doc/img/direct_vs_indirect.png differ diff --git a/doc/index.rst b/doc/index.rst index 552f6114604d40feaf3ce47cf3aab740e41a3289..591e5da1d5c8fa9819bf19b00d877bcfef461047 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -10,17 +10,49 @@ .. todolist:: -.. todo:: write introduction about ptbench here +Benchmarks of convolutional neural network (CNN) architectures applied to +Pulmonary Tuberculosis (TB) detection on chest X-rays (CXR). +Please use the BibTeX reference below to cite this work: -Documentation -------------- +.. code:: bibtex + + @INPROCEEDINGS{raposo_union_2022, + author = {Raposo, Geoffrey and Trajman, Anete and Anjos, Andr{\'{e}}}, + month = 11, + title = {Pulmonary Tuberculosis Screening from Radiological Signs on Chest X-Ray Images Using Deep Models}, + booktitle = {Union World Conference on Lung Health}, + year = {2022}, + date = {2022-11-01}, + organization = {The Union}, + } + + @TECHREPORT{Raposo_Idiap-Com-01-2021, + author = {Raposo, Geoffrey}, + keywords = {deep learning, generalization, Interpretability, transfer learning, Tuberculosis Detection}, + projects = {Idiap}, + month = {7}, + title = {Active tuberculosis detection from frontal chest X-ray images}, + type = {Idiap-Com}, + number = {Idiap-Com-01-2021}, + year = {2021}, + institution = {Idiap}, + url = {https://gitlab.idiap.ch/bob/bob.med.tb}, + pdf = {https://publidiap.idiap.ch/downloads/reports/2021/Raposo_Idiap-Com-01-2021.pdf} + } + + +User Guide +---------- .. toctree:: :maxdepth: 2 install usage + references + cli + config api diff --git a/doc/install.rst b/doc/install.rst index aebb216bc1cfbb772569817669db313994b097d5..f1f6772fdc2ed5b0118c3a74a454d57f4692ff4b 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -8,39 +8,210 @@ Installation ============== -.. todo:: fine-tune installation instructions for ptbench here +We support two installation modes, through pip_, or mamba_ (conda). -We support two installation modes, through pip_, or mamba_ (conda). +.. tab:: pip + + Stable, from PyPI: + + .. code:: sh + + pip install ptbench + + Latest beta, from GitLab package registry: + + .. code:: sh + + pip install --pre --index-url https://gitlab.idiap.ch/api/v4/groups/bob/-/packages/pypi/simple --extra-index-url https://pypi.org/simple ptbench + + .. tip:: + + To avoid long command-lines you may configure pip to define the indexes and + package search priorities as you like. + +.. tab:: mamba/conda -With pip --------- + Stable: -.. code-block:: sh + .. code:: sh - # stable, from PyPI: - $ pip install ptbench + mamba install -c https://www.idiap.ch/software/biosignal/conda -c conda-forge ptbench + + Latest beta: + + .. code:: sh + + mamba install -c https://www.idiap.ch/software/biosignal/conda/label/beta -c conda-forge ptbench + + +.. _ptbench.setup: + +Setup +----- + +A configuration file may be useful to setup global options that should be often +reused. The location of the configuration file depends on the value of the +environment variable ``$XDG_CONFIG_HOME``, but defaults to +``~/.config/ptbench.toml``. You may edit this file using your preferred +editor. + +Here is an example configuration file that may be useful as a starting point: + +.. code:: toml + + [datadir] + indian = "/Users/myself/dbs/tbxpredict" + montgomery = "/Users/myself/dbs/montgomery-xrayset" + shenzhen = "/Users/myself/dbs/shenzhen" - # latest beta, from GitLab package registry: - $ pip install --pre --index-url https://gitlab.idiap.ch/api/v4/groups/biosignal/software/-/packages/pypi/simple --extra-index-url https://pypi.org/simple ptbench .. tip:: - To avoid long command-lines you may configure pip to define the indexes and - package search priorities as you like. + To get a list of valid data directories that can be configured, execute: + + .. code:: sh + + ptbench dataset list + + + You must procure and download datasets by yourself. The raw data is not + included in this package as we are not authorised to redistribute it. + + +.. _ptbench.setup.datasets: + +Supported Datasets +================== + +Here is a list of currently supported datasets in this package, alongside +notable properties. Each dataset name is linked to the location where +raw data can be downloaded. The list of images in each split is available +in the source code. + + +.. _ptbench.setup.datasets.tb: + +Tuberculosis datasets +~~~~~~~~~~~~~~~~~~~~~ + +The following datasets contain only the tuberculosis final diagnosis (0 or 1). +In addition to the splits presented in the following table, 10 folds +(for cross-validation) randomly generated are available for these datasets. + +.. list-table:: + + * - Dataset + - Reference + - H x W + - Samples + - Training + - Validation + - Test + * - Montgomery_ + - [MONTGOMERY-SHENZHEN-2014]_ + - 4020 x 4892 + - 138 + - 88 + - 22 + - 28 + * - Shenzhen_ + - [MONTGOMERY-SHENZHEN-2014]_ + - Varying + - 662 + - 422 + - 107 + - 133 + * - Indian_ + - [INDIAN-2013]_ + - Varying + - 155 + - 83 + - 20 + - 52 + + +.. _ptbench.setup.datasets.tb+signs: + +Tuberculosis + radiological findings dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following dataset contains both the tuberculosis final diagnosis (0 or 1) +and radiological findings. + +.. list-table:: + + * - Dataset + - Reference + - H x W + - Samples + - Train + - Test + * - PadChest_ + - [PADCHEST-2019]_ + - Varying + - 160'861 + - 160'861 + - 0 + + +.. _ptbench.setup.datasets.signs: + +Radiological findings datasets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The following dataset contains only the radiological findings without any +information about tuberculosis. -With conda ----------- +.. note:: -.. code-block:: sh + NIH CXR14 labels for training and validation sets are the relabeled + versions done by the author of the CheXNeXt study [CHEXNEXT-2018]_. - # stable: - $ mamba install -c https://www.idiap.ch/software/biosignal/software/conda -c conda-forge ptbench +.. list-table:: - # latest beta: - $ mamba install -c https://www.idiap.ch/software/biosignal/software/conda/label/beta -c conda-forge ptbench + * - Dataset + - Reference + - H x W + - Samples + - Training + - Validation + - Test + * - NIH_CXR14_re_ + - [NIH-CXR14-2017]_ + - 1024 x 1024 + - 109'041 + - 98'637 + - 6'350 + - 4'054 + + +.. _ptbench.setup.datasets.hiv-tb: + +HIV-Tuberculosis datasets +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following datasets contain only the tuberculosis final diagnosis (0 or 1) +and come from HIV infected patients. 10 folds (for cross-validation) randomly +generated are available for these datasets. + +Please contact the authors of these datasets to have access to the data. + +.. list-table:: + + * - Dataset + - Reference + - H x W + - Samples + * - TB POC + - [TB-POC-2018]_ + - 2048 x 2500 + - 407 + * - HIV TB + - [HIV-TB-2019]_ + - 2048 x 2500 + - 243 .. include:: links.rst diff --git a/doc/links.rst b/doc/links.rst index 76efd229e86721d36cedbc6683d4d4c3c966550e..df1ef5ea53ee13e8d6c41c02bdaf2367b3940bd6 100644 --- a/doc/links.rst +++ b/doc/links.rst @@ -10,3 +10,11 @@ .. _python: http://www.python.org .. _pip: https://pip.pypa.io/en/stable/ .. _mamba: https://mamba.readthedocs.io/en/latest/index.html +.. _pytorch: https://pytorch.org + +.. Raw data websites +.. _montgomery: https://lhncbc.nlm.nih.gov/publication/pub9931 +.. _shenzhen: https://lhncbc.nlm.nih.gov/publication/pub9931 +.. _indian: https://sourceforge.net/projects/tbxpredict/ +.. _NIH_CXR14_re: https://nihcc.app.box.com/v/ChestXray-NIHCC +.. _PadChest: https://bimcv.cipf.es/bimcv-projects/padchest/ diff --git a/doc/nitpick-exceptions.txt b/doc/nitpick-exceptions.txt new file mode 100644 index 0000000000000000000000000000000000000000..8f6fe3b338c959f4f3113d1280c57228351a8c0a --- /dev/null +++ b/doc/nitpick-exceptions.txt @@ -0,0 +1,2 @@ +py:class torch.nn.modules.loss._Loss +py:class Module diff --git a/doc/references.rst b/doc/references.rst new file mode 100644 index 0000000000000000000000000000000000000000..4897ee5bebf8a8937fa247d2f4fc8a199279ff56 --- /dev/null +++ b/doc/references.rst @@ -0,0 +1,61 @@ + +.. coding=utf-8 + +============ + References +============ + +.. [MONTGOMERY-SHENZHEN-2014] *Jaeger S, Candemir S, Antani S, Wáng YX, Lu PX, + Thoma G.*, **Two public chest X-ray datasets for computer-aided screening of + pulmonary diseases.**, Quant Imaging Med Surg. 2014;4(6):475â€477. + https://dx.doi.org/10.3978%2Fj.issn.2223-4292.2014.11.20 + +.. [INDIAN-2013] https://sourceforge.net/projects/tbxpredict/ + +.. [PASA-2019] *Pasa, F., Golkov, V., Pfeiffer, F. et al.*, + **Efficient Deep Network Architectures for Fast Chest X-Ray Tuberculosis + Screening and Visualization.** Sci Rep 9, 6268 (2019). + https://doi.org/10.1038/s41598-019-42557-4 + +.. [SIMARD-2003] *P. Y. Simard, D. Steinkraus and J. C. Platt*, + **Best practices for convolutional neural networks applied to visual + document analysis**, Seventh International Conference on Document Analysis + and Recognition, 2003. Proceedings., Edinburgh, UK, 2003, pp. 958-963. + https://doi.org/10.1109/ICDAR.2003.1227801 + +.. [CHEXNEXT-2018] *Rajpurkar Pranav, Jeremy Irvin, Robyn L. Ball, Kaylie Zhu, + Brandon Yang, Hershel Mehta, Tony Duan, et al.*, **Deep Learning for Chest + Radiograph Diagnosis: A Retrospective Comparison of the CheXNeXt Algorithm + to Practicing Radiologists**. PLOS Medicine 15, náµ’ 11 (20 november 2018): + e1002686. https://doi.org/10.1371/journal.pmed.1002686 + +.. [NIH-CXR14-2017] *Xiaosong Wang et al.*, **ChestX-Ray8: Hospital-Scale + Chest X-Ray Database and Benchmarks on Weakly-Supervised Classification + and Localization of Common Thorax Diseases.** 2017 IEEE Conference on + Computer Vision and Pattern Recognition (CVPR). 2017 IEEE Conference on + Computer Vision and Pattern Recognition (CVPR). Honolulu, HI: IEEE, + July 2017, pp. 3462–3471. doi: 10.1109/CVPR.2017.369. + http://ieeexplore.ieee.org/document/8099852/ + +.. [PADCHEST-2019] *Aurelia Bustos et al.*, **PadChest: A large chest x-ray + image dataset with multi-label annotated reports** Medical Image Analysis, + Volume 66, 2020, 101797, ISSN 1361-8415. doi: 10.1016/j.media.2020.101797. + https://www.sciencedirect.com/science/article/abs/pii/S1361841520301614 + +.. [GOUTTE-2005] *C. Goutte and E. Gaussier*, **A probabilistic interpretation + of precision, recall and F-score, with implication for evaluation**, + European conference on Advances in Information Retrieval Research, 2005. + https://doi.org/10.1007/978-3-540-31865-1_25 + +.. [TB-POC-2018] *Griesel, Rulan and Stewart, Annemie and van der Plas, Helen + and Sikhondze, Welile and Rangaka, Molebogeng X and Nicol, Mark P and + Kengne, Andre P and Mendelson, Marc and Maartens, Gary*, **Optimizing + Tuberculosis Diagnosis in Human Immunodeficiency Virus–Infected Inpatients + Meeting the Criteria of Seriously Ill in the World Health Organization + Algorithm**, Clinical Infectious Diseases, 2017. + https://doi.org/10.1093/cid/cix988 + +.. [HIV-TB-2019] *Van Hoving, D. J. et al.*, **Brief report: real-world + performance and interobserver agreement of urine lipoarabinomannan in + diagnosing HIV-Associated tuberculosis in an emergency center.**, + J. Acquir. Immune Defic. Syndr. 1999 81, e10–e14 (2019). diff --git a/doc/usage.rst b/doc/usage.rst index 87803559f00670a3b2c597e3eaf9029f160693b5..7dfba5213b48adf2b62fede52de5673a4900d9de 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -8,8 +8,84 @@ Usage ======= +This package supports a fully reproducible research experimentation cycle for +tuberculosis detection with support for the following activities. -.. todo:: write usage instructions for ptbench +.. figure:: img/direct_vs_indirect.png + + +.. _ptbench.usage.direct-detection: + +Direct detection +---------------- + +* Training: Images are fed to a Convolutional Neural Network (CNN), + that is trained to detect the presence of tuberculosis + automatically, via error back propagation. The objective of this phase is to + produce a CNN model. +* Inference (prediction): The CNN is used to generate TB predictions. +* Evaluation: Predications are used to evaluate CNN performance against + provided annotations, and to generate measure files and score tables. Optimal + thresholds are also calculated. +* Comparison: Use predictions results to compare performance of multiple + systems. + + +.. _ptbench.usage.indirect-detection: + +Indirect detection +------------------ + +* Training (step 1): Images are fed to a Convolutional Neural Network (CNN), + that is trained to detect the presence of radiological signs + automatically, via error back propagation. The objective of this phase is to + produce a CNN model. +* Inference (prediction): The CNN is used to generate radiological signs + predictions. +* Conversion of the radiological signs predictions into a new dataset. +* Training (step 2): Radiological signs are fed to a shallow network, that is + trained to detect the presence of tuberculosis automatically, via error back + propagation. The objective of this phase is to produce a shallow model. +* Inference (prediction): The shallow model is used to generate TB predictions. +* Evaluation: Predications are used to evaluate CNN performance against + provided annotations, and to generate measure files and score tables. +* Comparison: Use predictions results to compare performance of multiple + systems. + +We provide :ref:`command-line interfaces (CLI) <ptbench.cli>` that implement +each of the phases above. This interface is configurable using :ref:`exposed's +extensible configuration framework <exposed.config>`. In essence, +each command-line option may be provided as a variable with the same name in a +Python file. Each file may combine any number of variables that are pertinent +to an application. + +.. tip:: + + For reproducibility, we recommend you stick to configuration files when + parameterizing our CLI. Notice some of the options in the CLI interface + (e.g. ``--dataset``) cannot be passed via the actual command-line as it + may require complex Python types that cannot be synthetized in a single + input parameter. + +We provide a number of :ref:`preset configuration files <ptbench.config>` that +can be used in one or more of the activities described in this section. Our +command-line framework allows you to refer to these preset configuration files +using special names (a.k.a. "resources"), that procure and load these for you +automatically. + + +.. _ptbench.usage.commands: + +Commands +-------- + +.. toctree:: + :maxdepth: 2 + + usage/training + usage/evaluation + usage/predtojson + usage/aggregpred .. include:: links.rst diff --git a/doc/usage/aggregpred.rst b/doc/usage/aggregpred.rst new file mode 100644 index 0000000000000000000000000000000000000000..bcec1eef503e8017e0e242bf9695b4296e9c402c --- /dev/null +++ b/doc/usage/aggregpred.rst @@ -0,0 +1,24 @@ +.. Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +.. +.. SPDX-License-Identifier: GPL-3.0-or-later + +.. _ptbench.usage.aggregpred: + +======================================================= + Aggregate multiple prediction files into a single one +======================================================= + +This guide explains how to aggregate multiple prediction files into a single +one. It can be used when doing cross-validation to aggregate the predictions of +k different models before evaluating the aggregated predictions. We input +multiple prediction files (CSV files) and output a single one. + +Use the sub-command :ref:`aggregpred <ptbench.cli>` aggregate your prediction +files together: + +.. code:: sh + + ptbench aggregpred -vv path/to/fold0/predictions.csv path/to/fold1/predictions.csv --output-folder=aggregpred + + +.. include:: ../links.rst diff --git a/doc/usage/evaluation.rst b/doc/usage/evaluation.rst new file mode 100644 index 0000000000000000000000000000000000000000..c712585d3447c89aa628f8f3ef9a376e37234d33 --- /dev/null +++ b/doc/usage/evaluation.rst @@ -0,0 +1,77 @@ +.. Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +.. +.. SPDX-License-Identifier: GPL-3.0-or-later + +.. _ptbench.usage.evaluation: + +========================== + Inference and Evaluation +========================== + +This guides explains how to run inference or a complete evaluation using +command-line tools. Inference produces probability of TB presence for input +images, while evaluation will analyze such output against existing annotations +and produce performance figures. + + +Inference +--------- + +In inference (or prediction) mode, we input data, the trained model, and output +a CSV file containing the prediction outputs for every input image. + +To run inference, use the sub-command :ref:`predict <ptbench.cli>` to run +prediction on an existing dataset: + +.. code:: sh + + ptbench predict -vv <model> -w <path/to/model.pth> <dataset> + + +Replace ``<model>`` and ``<dataset>`` by the appropriate :ref:`configuration +files <ptbench.config>`. Replace ``<path/to/model.pth>`` to a path leading to +the pre-trained model. + +.. tip:: + + An option to generate grad-CAMs is available for the :py:mod:`DensenetRS + <ptbench.configs.models_datasets.densenet_rs>` model. To activate it, use + the ``--grad-cams`` argument. + +.. tip:: + + An option to generate a relevance analysis plot is available. To activate + it, use the ``--relevance-analysis`` argument. + + +Evaluation +---------- + +In evaluation, we input a dataset and predictions to generate performance +summaries that help analysis of a trained model. Evaluation is done using the +:ref:`evaluate command <ptbench.cli>` followed by the model and the annotated +dataset configuration, and the path to the pretrained weights via the +``--weight`` argument. + +Use ``bob tb evaluate --help`` for more information. + +E.g. run evaluation on predictions from the Montgomery set, do the following: + +.. code:: sh + + bob tb evaluate -vv montgomery -p /predictions/folder -o /eval/results/folder + + +Comparing Systems +----------------- + +To compare multiple systems together and generate combined plots and tables, +use the :ref:`compare command <ptbench.cli>`. Use ``--help`` for a quick +guide. + +.. code:: sh + + ptbench compare -vv A A/metrics.csv B B/metrics.csv --output-figure=plot.pdf --output-table=table.txt --threshold=0.5 + + +.. include:: ../links.rst diff --git a/doc/usage/predtojson.rst b/doc/usage/predtojson.rst new file mode 100644 index 0000000000000000000000000000000000000000..81e31d2fa8422f52e739146ccf8aa951469f6806 --- /dev/null +++ b/doc/usage/predtojson.rst @@ -0,0 +1,24 @@ +.. Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +.. +.. SPDX-License-Identifier: GPL-3.0-or-later + +.. _ptbench.usage.predtojson: + +======================================== + Converting predictions to JSON dataset +======================================== + +This guide explains how to convert radiological signs predictions from a model +into a JSON dataset. It can be used to create new versions of TB datasets with +the predicted radiological signs to be able to use a shallow model. We input +predictions (CSV files) and output a ``dataset.json`` file. + +Use the sub-command :ref:`predtojson <ptbench.cli>` to create your JSON dataset +file: + +.. code:: sh + + ptbench predtojson -vv train train/predictions.csv test test/predictions.csv --output-folder=pred_to_json + + +.. include:: ../links.rst diff --git a/doc/usage/training.rst b/doc/usage/training.rst new file mode 100644 index 0000000000000000000000000000000000000000..67b3918cee7640bdee8f0277ec5390142163c21e --- /dev/null +++ b/doc/usage/training.rst @@ -0,0 +1,72 @@ +.. Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +.. +.. SPDX-License-Identifier: GPL-3.0-or-later + +.. _ptbench.usage.training: + +========== + Training +========== + +Convolutional Neural Network (CNN) +---------------------------------- + +To train a new CNN, use the command-line interface (CLI) application ``ptbench +train``, available on your prompt. To use this CLI, you must define the input +dataset that will be used to train the CNN, as well as the type of model that +will be trained. You may issue ``ptbench train --help`` for a help message +containing more detailed instructions. + +.. tip:: + + We strongly advice training with a GPU (using ``--device="cuda:0"``). + Depending on the available GPU memory you might have to adjust your batch + size (``--batch``). + + +Examples +======== + +To train Pasa CNN on the Montgomery dataset: + +.. code:: sh + + ptbench train -vv pasa montgomery --batch-size=4 --epochs=150 + +To train DensenetRS CNN on the NIH CXR14 dataset: + +.. code:: sh + + ptbench train -vv nih_cxr14 densenet_rs --batch-size=8 --epochs=10 + + +Logistic regressor or shallow network +------------------------------------- + +To train a logistic regressor or a shallow network, use the command-line +interface (CLI) application ``ptbench train``, available on your prompt. To use +this CLI, you must define the input dataset that will be used to train the +model, as well as the type of model that will be trained. +You may issue ``ptbench train --help`` for a help message containing more +detailed instructions. + +Examples +======== + +To train a logistic regressor using predictions from DensenetForRS on the +Montgomery dataset: + +.. code:: sh + + ptbench train -vv logistic_regression montgomery_rs --batch-size=4 --epochs=20 + + +To train Signs_to_TB using predictions from DensenetForRS on the Shenzhen +dataset: + +.. code:: sh + + ptbench train -vv signs_to_tb shenzhen_rs --batch-size=4 --epochs=20 + + +.. include:: ../links.rst diff --git a/pyproject.toml b/pyproject.toml index b940ab31b31770b6eb9705fb7c2309616f0ba57c..884c48b90109c7ff2d4607b2b82fe9f60e373588 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ [project] name = "ptbench" -version = "0.0.1b0" +version = "1.0.0b0" requires-python = ">=3.9" description = "Benchmarks for training and evaluating deep models for the detection of active Pulmonary Tuberculosis from Chest X-Ray imaging." dynamic = ["readme"] @@ -56,12 +56,11 @@ qa = ["pre-commit"] doc = [ "sphinx", "furo", - "auto-intersphinx", "sphinx-autodoc-typehints", - "sphinxcontrib-programoutput", "auto-intersphinx", "sphinx-copybutton", "sphinx-inline-tabs", + "sphinx-click", ] test = [ "pytest", diff --git a/src/ptbench/data/transforms.py b/src/ptbench/data/transforms.py index 2ded9b3c50a4883d1d0518f525901042c72c4471..91d56111ccb2e121aa3fc6bbd91c29ed7c0255da 100644 --- a/src/ptbench/data/transforms.py +++ b/src/ptbench/data/transforms.py @@ -5,11 +5,12 @@ """Image transformations for our pipelines. Differences between methods here and those from -:py:mod:`torchvision.transforms` is that these support multiple simultaneous -image inputs, which are required to feed segmentation networks (e.g. image and -labels or masks). We also take care of data augmentations, in which random -flipping and rotation needs to be applied across all input images, but color -jittering, for example, only on the input image. +:py:mod:`torchvision.transforms` is that these support multiple +simultaneous image inputs, which are required to feed segmentation +networks (e.g. image and labels or masks). We also take care of data +augmentations, in which random flipping and rotation needs to be applied +across all input images, but color jittering, for example, only on the +input image. """ import random @@ -55,10 +56,6 @@ class RemoveBlackBorders: class ElasticDeformation: """Elastic deformation of 2D image slightly adapted from [SIMARD-2003]_. - .. [SIMARD-2003] Simard, Steinkraus and Platt, "Best Practices for - Convolutional Neural Networks applied to Visual Document Analysis", in - Proc. of the International Conference on Document Analysis and - Recognition, 2003. Source: https://gist.github.com/oeway/2e3b989e0343f0884388ed7ed82eb3b0 """ diff --git a/src/ptbench/engine/evaluator.py b/src/ptbench/engine/evaluator.py index ad4bcc6965dd6afbc4c4993c6d98508afab5b96c..3c0348c0d8c65f3f209c8db329a46af4a5144a80 100644 --- a/src/ptbench/engine/evaluator.py +++ b/src/ptbench/engine/evaluator.py @@ -26,9 +26,11 @@ def eer_threshold(neg, pos) -> float: Parameters ---------- - neg: Negative scores + neg : typing.Iterable[float] + Negative scores - pos: Positive scores + pos : typing.Iterable[float] + Positive scores Returns: diff --git a/src/ptbench/engine/trainer.py b/src/ptbench/engine/trainer.py index 60fccaeba664ad42a87052a34fc7c958e14fd48f..2c6be7e146c8a627ec195b507f72e2d7d91d9caf 100644 --- a/src/ptbench/engine/trainer.py +++ b/src/ptbench/engine/trainer.py @@ -354,7 +354,7 @@ def checkpointer_process( Parameters ---------- - checkpointer : :py:class:`bob.med.tb.utils.checkpointer.Checkpointer` + checkpointer : :py:class:`ptbench.utils.checkpointer.Checkpointer` checkpointer implementation checkpoint_period : int @@ -517,7 +517,7 @@ def run( criterion : :py:class:`torch.nn.modules.loss._Loss` loss function - checkpointer : :py:class:`bob.med.tb.utils.checkpointer.Checkpointer` + checkpointer : :py:class:`ptbench.utils.checkpointer.Checkpointer` checkpointer implementation checkpoint_period : int diff --git a/src/ptbench/scripts/predtojson.py b/src/ptbench/scripts/predtojson.py index 75252cf23ea1ad3560270fbeab277af8577b4a74..ce9a23bec626312a80710942e16ba3690ed7fee1 100644 --- a/src/ptbench/scripts/predtojson.py +++ b/src/ptbench/scripts/predtojson.py @@ -60,7 +60,7 @@ def _load(data): epilog="""Examples: \b - 1. Convert predictions of radiological signs to a JSON dataset file_ + 1. Convert predictions of radiological signs to a JSON dataset file: .. code:: sh diff --git a/src/ptbench/scripts/train_analysis.py b/src/ptbench/scripts/train_analysis.py index 2359e0c5d9b0d0c087589c8924895bbbbf2f3abd..f5d0bc323daeff2d7454289b452cd0e4cd62f842 100644 --- a/src/ptbench/scripts/train_analysis.py +++ b/src/ptbench/scripts/train_analysis.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: GPL-3.0-or-later +from __future__ import annotations + import os import click @@ -19,15 +21,14 @@ def _loss_evolution(df): Parameters ---------- - df : pandas.DataFrame - dataframe containing the training logs + df : pandas.DataFrame + dataframe containing the training logs Returns ------- - figure : matplotlib.figure.Figure - figure to be displayed or saved to file + matplotlib.figure.Figure: Figure to be displayed or saved to file """ import numpy @@ -78,18 +79,17 @@ def _hardware_utilisation(df, const): Parameters ---------- - df : pandas.DataFrame - dataframe containing the training logs + df : pandas.DataFrame + dataframe containing the training logs - const : dict - training and hardware constants + const : dict + training and hardware constants Returns ------- - figure : matplotlib.figure.Figure - figure to be displayed or saved to file + matplotlib.figure.Figure: figure to be displayed or saved to file """ figure = plt.figure() @@ -133,14 +133,16 @@ def _hardware_utilisation(df, const): @click.command( - entry_point_group="bob.med.tb.config", + entry_point_group="ptbench.config", cls=ConfigCommand, epilog="""Examples: \b 1. Analyzes a training log and produces various plots: - $ bob binseg train-analysis -vv log.csv constants.csv + .. code:: sh + + ptbench train-analysis -vv log.csv constants.csv """, ) @@ -167,7 +169,7 @@ def train_analysis( output_pdf, **_, ): - """Analyze the training logs for loss evolution and resource + """Analyzes the training logs for loss evolution and resource utilisation.""" import pandas diff --git a/src/ptbench/utils/download.py b/src/ptbench/utils/download.py index 981165f51f084bd66483c0bc751718917f3f3374..9c780c7e3b2e7adc3e33fdec65018820152b55fe 100644 --- a/src/ptbench/utils/download.py +++ b/src/ptbench/utils/download.py @@ -27,7 +27,7 @@ def download_to_tempfile(url, progress=False): Returns ------- - f : tempfile.NamedTemporaryFile + f : :py:func:`tempfile.NamedTemporaryFile` A named temporary file that contains the downloaded URL """