From b8f0c8cd610c4289fa603daa59898b55784847fc Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Thu, 20 Jul 2023 23:34:12 +0200 Subject: [PATCH] [data.montgomery/shenzhen] Simplify documentation maintenance --- src/ptbench/data/montgomery/default.py | 23 +++++++++++---- src/ptbench/data/montgomery/fold_0.py | 24 ++------------- src/ptbench/data/montgomery/fold_1.py | 22 +------------- src/ptbench/data/montgomery/fold_2.py | 22 +------------- src/ptbench/data/montgomery/fold_3.py | 22 +------------- src/ptbench/data/montgomery/fold_4.py | 22 +------------- src/ptbench/data/montgomery/fold_5.py | 22 +------------- src/ptbench/data/montgomery/fold_6.py | 22 +------------- src/ptbench/data/montgomery/fold_7.py | 22 +------------- src/ptbench/data/montgomery/fold_8.py | 22 +------------- src/ptbench/data/montgomery/fold_9.py | 22 +------------- src/ptbench/data/shenzhen/default.py | 41 ++++++++++++++++---------- src/ptbench/data/shenzhen/fold_0.py | 26 +--------------- src/ptbench/data/shenzhen/fold_1.py | 26 +--------------- src/ptbench/data/shenzhen/fold_2.py | 26 +--------------- src/ptbench/data/shenzhen/fold_3.py | 26 +--------------- src/ptbench/data/shenzhen/fold_4.py | 26 +--------------- src/ptbench/data/shenzhen/fold_5.py | 26 +--------------- src/ptbench/data/shenzhen/fold_6.py | 26 +--------------- src/ptbench/data/shenzhen/fold_7.py | 26 +--------------- src/ptbench/data/shenzhen/fold_8.py | 26 +--------------- src/ptbench/data/shenzhen/fold_9.py | 26 +--------------- 22 files changed, 63 insertions(+), 483 deletions(-) diff --git a/src/ptbench/data/montgomery/default.py b/src/ptbench/data/montgomery/default.py index 35878b1a..bb57b9a7 100644 --- a/src/ptbench/data/montgomery/default.py +++ b/src/ptbench/data/montgomery/default.py @@ -2,16 +2,27 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Montgomery datamodule for TB detection (default protocol) +"""Montgomery datamodule for TB detection (``default`` protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. +The standard digital image database for Tuberculosis was created by the National +Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3 People’s +Hospital, Guangdong Medical College, Shenzhen, China. The Chest X-rays are from -This configuration: +* Database reference: [MONTGOMERY-SHENZHEN-2014]_ +* Original resolution (height x width or width x height): 4020x4892 px or 4892x4020 px +* This split: + + * Split reference: None + * Training samples: ?? of TB and healthy CXR + * Validation samples: ?? of TB and healthy CXR + * Test samples: ?? of TB and healthy CXR + +Data specifications: * Raw data input (on disk): * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px + * resolution: fixed to one of the cases above * Output image: @@ -23,8 +34,8 @@ This configuration: * Final specifications - * Grayscale (single channel), 8 bits - * Varying resolutions + * Grayscale, encoded as a single plane image, 8 bits + * Square (4020x4020 px) Protocol ``default``: diff --git a/src/ptbench/data/montgomery/fold_0.py b/src/ptbench/data/montgomery/fold_0.py index 4bfc4784..e50d2e30 100644 --- a/src/ptbench/data/montgomery/fold_0.py +++ b/src/ptbench/data/montgomery/fold_0.py @@ -2,29 +2,9 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Montgomery datamodule for TB detection (default protocol) +"""Montgomery datamodule for TB detection (``fold 0`` protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_1.py b/src/ptbench/data/montgomery/fold_1.py index 0a74516f..3698a9ed 100644 --- a/src/ptbench/data/montgomery/fold_1.py +++ b/src/ptbench/data/montgomery/fold_1.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_2.py b/src/ptbench/data/montgomery/fold_2.py index 386d3080..b2d7ac2c 100644 --- a/src/ptbench/data/montgomery/fold_2.py +++ b/src/ptbench/data/montgomery/fold_2.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_3.py b/src/ptbench/data/montgomery/fold_3.py index 1bfaa888..1c566e4f 100644 --- a/src/ptbench/data/montgomery/fold_3.py +++ b/src/ptbench/data/montgomery/fold_3.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_4.py b/src/ptbench/data/montgomery/fold_4.py index b955cb15..4b68bd53 100644 --- a/src/ptbench/data/montgomery/fold_4.py +++ b/src/ptbench/data/montgomery/fold_4.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_5.py b/src/ptbench/data/montgomery/fold_5.py index 5604cfdd..59891e8e 100644 --- a/src/ptbench/data/montgomery/fold_5.py +++ b/src/ptbench/data/montgomery/fold_5.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_6.py b/src/ptbench/data/montgomery/fold_6.py index 72178209..e6c1d31a 100644 --- a/src/ptbench/data/montgomery/fold_6.py +++ b/src/ptbench/data/montgomery/fold_6.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_7.py b/src/ptbench/data/montgomery/fold_7.py index de895133..44dd8051 100644 --- a/src/ptbench/data/montgomery/fold_7.py +++ b/src/ptbench/data/montgomery/fold_7.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_8.py b/src/ptbench/data/montgomery/fold_8.py index bc901fc5..fd7edde6 100644 --- a/src/ptbench/data/montgomery/fold_8.py +++ b/src/ptbench/data/montgomery/fold_8.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_9.py b/src/ptbench/data/montgomery/fold_9.py index 758ae340..91228362 100644 --- a/src/ptbench/data/montgomery/fold_9.py +++ b/src/ptbench/data/montgomery/fold_9.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/default.py b/src/ptbench/data/shenzhen/default.py index ba8a2b57..a163b9bc 100644 --- a/src/ptbench/data/shenzhen/default.py +++ b/src/ptbench/data/shenzhen/default.py @@ -2,33 +2,42 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Shenzhen datamodule for computer-aided diagnosis (default protocol) +"""Shenzhen datamodule for computer-aided diagnosis (``default`` protocol) -See :py:mod:`ptbench.data.shenzhen` for more database details. +The standard digital image database for Tuberculosis was created by the National +Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3 People’s +Hospital, Guangdong Medical College, Shenzhen, China. The Chest X-rays are from +out-patient clinics, and were captured as part of the daily routine using +Philips DR Digital Diagnose systems. -This configuration: +* Database reference: [MONTGOMERY-SHENZHEN-2014]_ +* Original resolution (height x width or width x height): 3000 x 3000 or less +* This split: -* Raw data input (on disk): + * Split reference: None + * Training samples: 64% of TB and healthy CXR (including labels) + * Validation samples: 16% of TB and healthy CXR (including labels) + * Test samples: 20% of TB and healthy CXR (including labels) + +Data specifications: - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: +* Raw data input (on disk): - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels + * PNG images (grayscale, encoded as RGB images with "inverted" grayscale scale) + * Variable width and height * Output image: - * Transforms: + * Transforms: - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image + * Load raw PNG with :py:mod:`PIL` + * Remove black borders + * Torch center cropping to get square image - * Final specifications: + * Final specifications: - * grayscale, encoded as RGB image - * varying resolutions - + * Grayscale, encoded as a single plane image, 8 bits + * Square, with varying resolutions, depending on the input image """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_0.py b/src/ptbench/data/shenzhen/fold_0.py index d5c3e447..b5059744 100644 --- a/src/ptbench/data/shenzhen/fold_0.py +++ b/src/ptbench/data/shenzhen/fold_0.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 0) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_1.py b/src/ptbench/data/shenzhen/fold_1.py index 5c7a294a..1041c3e4 100644 --- a/src/ptbench/data/shenzhen/fold_1.py +++ b/src/ptbench/data/shenzhen/fold_1.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 1) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_2.py b/src/ptbench/data/shenzhen/fold_2.py index 31480af5..5026116a 100644 --- a/src/ptbench/data/shenzhen/fold_2.py +++ b/src/ptbench/data/shenzhen/fold_2.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 2) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_3.py b/src/ptbench/data/shenzhen/fold_3.py index a1881ad6..16c00157 100644 --- a/src/ptbench/data/shenzhen/fold_3.py +++ b/src/ptbench/data/shenzhen/fold_3.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 3) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_4.py b/src/ptbench/data/shenzhen/fold_4.py index f86eb665..c0b0fdac 100644 --- a/src/ptbench/data/shenzhen/fold_4.py +++ b/src/ptbench/data/shenzhen/fold_4.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 4) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_5.py b/src/ptbench/data/shenzhen/fold_5.py index 16ea44f7..0397955e 100644 --- a/src/ptbench/data/shenzhen/fold_5.py +++ b/src/ptbench/data/shenzhen/fold_5.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 5) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_6.py b/src/ptbench/data/shenzhen/fold_6.py index caecaa0d..145685ea 100644 --- a/src/ptbench/data/shenzhen/fold_6.py +++ b/src/ptbench/data/shenzhen/fold_6.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 6) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_7.py b/src/ptbench/data/shenzhen/fold_7.py index ee34486c..5b8d7403 100644 --- a/src/ptbench/data/shenzhen/fold_7.py +++ b/src/ptbench/data/shenzhen/fold_7.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 7) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_8.py b/src/ptbench/data/shenzhen/fold_8.py index 6c81ca70..e9ce1a2f 100644 --- a/src/ptbench/data/shenzhen/fold_8.py +++ b/src/ptbench/data/shenzhen/fold_8.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 8) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_9.py b/src/ptbench/data/shenzhen/fold_9.py index 546e449e..6da8dd3d 100644 --- a/src/ptbench/data/shenzhen/fold_9.py +++ b/src/ptbench/data/shenzhen/fold_9.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 9) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources -- GitLab