diff --git a/src/ptbench/data/montgomery/default.py b/src/ptbench/data/montgomery/default.py index 35878b1a761f1b93a273c585207541921d23a8f8..bb57b9a7e8d95f9af40d36ac5a57349c8f514846 100644 --- a/src/ptbench/data/montgomery/default.py +++ b/src/ptbench/data/montgomery/default.py @@ -2,16 +2,27 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Montgomery datamodule for TB detection (default protocol) +"""Montgomery datamodule for TB detection (``default`` protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. +The standard digital image database for Tuberculosis was created by the National +Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3 People’s +Hospital, Guangdong Medical College, Shenzhen, China. The Chest X-rays are from -This configuration: +* Database reference: [MONTGOMERY-SHENZHEN-2014]_ +* Original resolution (height x width or width x height): 4020x4892 px or 4892x4020 px +* This split: + + * Split reference: None + * Training samples: ?? of TB and healthy CXR + * Validation samples: ?? of TB and healthy CXR + * Test samples: ?? of TB and healthy CXR + +Data specifications: * Raw data input (on disk): * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px + * resolution: fixed to one of the cases above * Output image: @@ -23,8 +34,8 @@ This configuration: * Final specifications - * Grayscale (single channel), 8 bits - * Varying resolutions + * Grayscale, encoded as a single plane image, 8 bits + * Square (4020x4020 px) Protocol ``default``: diff --git a/src/ptbench/data/montgomery/fold_0.py b/src/ptbench/data/montgomery/fold_0.py index 4bfc478404555fa16af02469c24ab3aa1797fc1c..e50d2e302f1c6b529c862c529bb77cf20aef8a57 100644 --- a/src/ptbench/data/montgomery/fold_0.py +++ b/src/ptbench/data/montgomery/fold_0.py @@ -2,29 +2,9 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Montgomery datamodule for TB detection (default protocol) +"""Montgomery datamodule for TB detection (``fold 0`` protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_1.py b/src/ptbench/data/montgomery/fold_1.py index 0a74516fd365a34614419da75a8beadbee5f6dda..3698a9edfa614f980b9b2352d97c7329965d371d 100644 --- a/src/ptbench/data/montgomery/fold_1.py +++ b/src/ptbench/data/montgomery/fold_1.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_2.py b/src/ptbench/data/montgomery/fold_2.py index 386d30803c3cdc7a1c78622eaea412b6025ac10f..b2d7ac2cfd8def5627b56d5353740e9676e1d9cc 100644 --- a/src/ptbench/data/montgomery/fold_2.py +++ b/src/ptbench/data/montgomery/fold_2.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_3.py b/src/ptbench/data/montgomery/fold_3.py index 1bfaa888987ff7053dbfc9634f60829e2748a078..1c566e4f528e587cfd8a3bd882e2c73ea5a46aa6 100644 --- a/src/ptbench/data/montgomery/fold_3.py +++ b/src/ptbench/data/montgomery/fold_3.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_4.py b/src/ptbench/data/montgomery/fold_4.py index b955cb1503f41f3bd0b83376a61fc49fba985619..4b68bd538f71115a01bae0fce87742be6ab711a8 100644 --- a/src/ptbench/data/montgomery/fold_4.py +++ b/src/ptbench/data/montgomery/fold_4.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_5.py b/src/ptbench/data/montgomery/fold_5.py index 5604cfdd3496ac085200b5c06793a1ecdafb19f1..59891e8e1b5531b94fc996bfe25ef140ff39a83a 100644 --- a/src/ptbench/data/montgomery/fold_5.py +++ b/src/ptbench/data/montgomery/fold_5.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_6.py b/src/ptbench/data/montgomery/fold_6.py index 72178209acbe75c938c73d3f055d95aee71f74a8..e6c1d31a69ff20bbfd3ec4e53ba4eab0f9beec7f 100644 --- a/src/ptbench/data/montgomery/fold_6.py +++ b/src/ptbench/data/montgomery/fold_6.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_7.py b/src/ptbench/data/montgomery/fold_7.py index de895133b4b329ef6b8344c2228e1603f68d1315..44dd80512be61c32616188968a418b9963b41aed 100644 --- a/src/ptbench/data/montgomery/fold_7.py +++ b/src/ptbench/data/montgomery/fold_7.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_8.py b/src/ptbench/data/montgomery/fold_8.py index bc901fc5038a19fb80b465cdb18e10647e3a03ee..fd7edde69259023fa36ff05027fe1f0ad19d6661 100644 --- a/src/ptbench/data/montgomery/fold_8.py +++ b/src/ptbench/data/montgomery/fold_8.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/montgomery/fold_9.py b/src/ptbench/data/montgomery/fold_9.py index 758ae3403a2944518fc17219a31ff71175e21fae..91228362f8c376d9ac9186f6675d80295e848f13 100644 --- a/src/ptbench/data/montgomery/fold_9.py +++ b/src/ptbench/data/montgomery/fold_9.py @@ -4,27 +4,7 @@ """Montgomery datamodule for TB detection (default protocol) -* See :py:mod:`ptbench.data.montgomery` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images 8 bit grayscale - * resolution: 4020 x 4892 px or 4892 x 4020 px - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications - - * Grayscale (single channel), 8 bits - * Varying resolutions +See :py:mod:`ptbench.data.montgomery.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/default.py b/src/ptbench/data/shenzhen/default.py index ba8a2b5714acbbe62e7e0f75c1fe8ec040721459..a163b9bc6290f53e611d214bdfa03e0cf93eb492 100644 --- a/src/ptbench/data/shenzhen/default.py +++ b/src/ptbench/data/shenzhen/default.py @@ -2,33 +2,42 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Shenzhen datamodule for computer-aided diagnosis (default protocol) +"""Shenzhen datamodule for computer-aided diagnosis (``default`` protocol) -See :py:mod:`ptbench.data.shenzhen` for more database details. +The standard digital image database for Tuberculosis was created by the National +Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3 People’s +Hospital, Guangdong Medical College, Shenzhen, China. The Chest X-rays are from +out-patient clinics, and were captured as part of the daily routine using +Philips DR Digital Diagnose systems. -This configuration: +* Database reference: [MONTGOMERY-SHENZHEN-2014]_ +* Original resolution (height x width or width x height): 3000 x 3000 or less +* This split: -* Raw data input (on disk): + * Split reference: None + * Training samples: 64% of TB and healthy CXR (including labels) + * Validation samples: 16% of TB and healthy CXR (including labels) + * Test samples: 20% of TB and healthy CXR (including labels) + +Data specifications: - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: +* Raw data input (on disk): - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels + * PNG images (grayscale, encoded as RGB images with "inverted" grayscale scale) + * Variable width and height * Output image: - * Transforms: + * Transforms: - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image + * Load raw PNG with :py:mod:`PIL` + * Remove black borders + * Torch center cropping to get square image - * Final specifications: + * Final specifications: - * grayscale, encoded as RGB image - * varying resolutions - + * Grayscale, encoded as a single plane image, 8 bits + * Square, with varying resolutions, depending on the input image """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_0.py b/src/ptbench/data/shenzhen/fold_0.py index d5c3e44737fe7a090b3b7ae33c7cc49c0843edf3..b505974491eea26e1da8931022eb168a42d57a0f 100644 --- a/src/ptbench/data/shenzhen/fold_0.py +++ b/src/ptbench/data/shenzhen/fold_0.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 0) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_1.py b/src/ptbench/data/shenzhen/fold_1.py index 5c7a294aa86109e3700b99ff953237fc8afab18e..1041c3e4ef6d14942dadd4c680dc10fee0cfd17c 100644 --- a/src/ptbench/data/shenzhen/fold_1.py +++ b/src/ptbench/data/shenzhen/fold_1.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 1) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_2.py b/src/ptbench/data/shenzhen/fold_2.py index 31480af5e70a72db203795d49631dd13c7df290a..5026116a9cd75ac406f334682b38ce760104444d 100644 --- a/src/ptbench/data/shenzhen/fold_2.py +++ b/src/ptbench/data/shenzhen/fold_2.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 2) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_3.py b/src/ptbench/data/shenzhen/fold_3.py index a1881ad69e928bc6dfaccec4db9c30b8da5027da..16c00157c5fa9fda38afc16614b75f2e766c33d5 100644 --- a/src/ptbench/data/shenzhen/fold_3.py +++ b/src/ptbench/data/shenzhen/fold_3.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 3) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_4.py b/src/ptbench/data/shenzhen/fold_4.py index f86eb6659b64ac24204f7a5a57edccb533e3379a..c0b0fdacdf90fdce168988057219923af73ad6a0 100644 --- a/src/ptbench/data/shenzhen/fold_4.py +++ b/src/ptbench/data/shenzhen/fold_4.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 4) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_5.py b/src/ptbench/data/shenzhen/fold_5.py index 16ea44f7809b0e595b9fb62d2496eaaf04b585ba..0397955e25d1077af68b825b5ecbf0d8974499db 100644 --- a/src/ptbench/data/shenzhen/fold_5.py +++ b/src/ptbench/data/shenzhen/fold_5.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 5) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_6.py b/src/ptbench/data/shenzhen/fold_6.py index caecaa0dd41e1c20a2618640c56f781db7937e39..145685ea96be63501a8afd771518b4b2f3f65c49 100644 --- a/src/ptbench/data/shenzhen/fold_6.py +++ b/src/ptbench/data/shenzhen/fold_6.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 6) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_7.py b/src/ptbench/data/shenzhen/fold_7.py index ee34486cf3cad7856dc0055d02d87f74b68a8b13..5b8d74034a18e2637a9a193557571521722e93bc 100644 --- a/src/ptbench/data/shenzhen/fold_7.py +++ b/src/ptbench/data/shenzhen/fold_7.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 7) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_8.py b/src/ptbench/data/shenzhen/fold_8.py index 6c81ca7062e3b55441f3052b04e7ae7d317fc9d2..e9ce1a2f408543bc93d8f116a2f8834ab79c989f 100644 --- a/src/ptbench/data/shenzhen/fold_8.py +++ b/src/ptbench/data/shenzhen/fold_8.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 8) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources diff --git a/src/ptbench/data/shenzhen/fold_9.py b/src/ptbench/data/shenzhen/fold_9.py index 546e449e4288c262886c3570f4e3a09735f923ad..6da8dd3d7a4260e7b9a478baea4b2848383f8459 100644 --- a/src/ptbench/data/shenzhen/fold_9.py +++ b/src/ptbench/data/shenzhen/fold_9.py @@ -4,31 +4,7 @@ """Shenzhen datamodule for computer-aided diagnosis (fold 9) -See :py:mod:`ptbench.data.shenzhen` for more database details. - -This configuration: - -* Raw data input (on disk): - - * PNG images (grayscale, encoded as RGB images) - * Variable width and height: - - * widths: from 1130 to 3001 pixels - * heights: from 948 to 3001 pixels - -* Output image: - - * Transforms: - - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image - - * Final specifications: - - * grayscale, encoded as RGB image - * varying resolutions - +See :py:mod:`ptbench.data.shenzhen.default` for input/output details. """ import importlib.resources