diff --git a/src/ptbench/data/indian/datamodule.py b/src/ptbench/data/indian/datamodule.py index 34fb4b5a90fc506cc2096a86526da4bacc7dcaba..6316f264e859c091232a61fd22104fc95b1fe2e6 100644 --- a/src/ptbench/data/indian/datamodule.py +++ b/src/ptbench/data/indian/datamodule.py @@ -25,14 +25,14 @@ class DataModule(CachingDataModule): in computer-aided diagnosis of pulmonary diseases with a special focus on pulmonary tuberculosis (TB). - * Original resolution (height x width or width x height): more than 1024 x 1024 + * Original images PNG, 8-bit grayscale, 1024 x 1024 pixels * Split reference: [INDIAN-2013]_ with 20% of train set for the validation set Data specifications: * Raw data input (on disk): - * PNG images (grayscale, encoded as RGB images with "inverted" grayscale scale) + * PNG RGB 8-bit depth images with "inverted" grayscale scale * Variable width and height * Output image: @@ -41,12 +41,14 @@ class DataModule(CachingDataModule): * Load raw PNG with :py:mod:`PIL` * Remove black borders + * Convert to torch tensor * Torch center cropping to get square image * Final specifications: - * Grayscale, encoded as a single plane image, 8 bits - * Square, with varying resolutions, depending on the input image + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square, with varying resolutions, depending on the input raw image + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/montgomery/datamodule.py b/src/ptbench/data/montgomery/datamodule.py index e04efebd6b72d205c2ee5c1e2f23559d7a01599d..12ead6e81a8e2673cd8b388d52bf3dd7ef9d7849 100644 --- a/src/ptbench/data/montgomery/datamodule.py +++ b/src/ptbench/data/montgomery/datamodule.py @@ -120,12 +120,14 @@ class DataModule(CachingDataModule): * Load raw PNG with :py:mod:`PIL` * Remove black borders + * Convert to torch tensor * Torch center cropping to get square image * Final specifications - * Grayscale, encoded as a single plane image, 8 bits - * Square (4020x4020 px) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square at 4020 x 4020 pixels + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/nih_cxr14/datamodule.py b/src/ptbench/data/nih_cxr14/datamodule.py index 5452640949ecc998ea4ee026616a894b1a7be7e9..b5bf77a28254a083455a35484df5248655ba34f7 100644 --- a/src/ptbench/data/nih_cxr14/datamodule.py +++ b/src/ptbench/data/nih_cxr14/datamodule.py @@ -127,7 +127,11 @@ class DataModule(CachingDataModule): CheXNeXt study. * Reference: [NIH-CXR14-2017]_ - * Original resolution (height x width): 1024 x 1024 + * Raw data input (on disk): + + * PNG RGB 8-bit depth images + * Resolution: 1024 x 1024 pixels + * Labels: [CHEXNEXT-2018]_ * Split reference: [CHEXNEXT-2018]_ * Protocol ``default``: @@ -141,11 +145,26 @@ class DataModule(CachingDataModule): * Transforms: * Load raw PNG with :py:mod:`PIL` - - * Final specifications - - * RGB, encoded as a 3-plane image, 8 bits - * Square (1024x1024 px) + * Convert to torch tensor + + * Final specifications: + + * RGB, encoded as a 3-plane tensor, 32-bit floats, square (1024x1024 px) + * Labels in order: + * cardiomegaly + * emphysema + * effusion + * hernia + * infiltration + * mass + * nodule + * atelectasis + * pneumothorax + * pleural thickening + * pneumonia + * fibrosis + * edema + * consolidation """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/shenzhen/datamodule.py b/src/ptbench/data/shenzhen/datamodule.py index 8307396df435433f220df0e07cc1f3da24b8e853..7cf1833b27ca1e79382cb7e45fa41b5b4ee27292 100644 --- a/src/ptbench/data/shenzhen/datamodule.py +++ b/src/ptbench/data/shenzhen/datamodule.py @@ -112,14 +112,14 @@ class DataModule(CachingDataModule): Philips DR Digital Diagnose systems. * Database reference: [MONTGOMERY-SHENZHEN-2014]_ - * Original resolution (height x width or width x height): 3000 x 3000 or less Data specifications: * Raw data input (on disk): - * PNG images (grayscale, encoded as RGB images with "inverted" grayscale scale) - * Variable width and height + * PNG 8-bit RGB images (grayscale, but encoded as RGB images with + "inverted" grayscale scale requiring special treatment). + * Variable width and height of 3000 x 3000 pixels or less * Output image: @@ -131,8 +131,9 @@ class DataModule(CachingDataModule): * Final specifications: - * Grayscale, encoded as a single plane image, 8 bits - * Square, with varying resolutions, depending on the input image + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square with varying resolutions, depending on the input image + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/tbx11k/datamodule.py b/src/ptbench/data/tbx11k/datamodule.py index 3420b6de672a05400330d1575ed3593579666f99..45785ddbd679a241d90c9c48cfdd526315f48fbc 100644 --- a/src/ptbench/data/tbx11k/datamodule.py +++ b/src/ptbench/data/tbx11k/datamodule.py @@ -246,17 +246,17 @@ class DataModule(CachingDataModule): active TB cases (total samples = 8369): - ``train`` dataset samples: - - Healthy: 4864 + - Healthy, Sick or Latent TB: 4864 - Active TB only: 377 - Total: 5241 - ``validation`` dataset samples: + - Healthy, Sick or Latent TB: 1239 - Active TB only: 96 - - Healthy: 1239 - Total: 1335 - ``test`` dataset samples: - - Healthy: 1636 + - Healthy, Sick or Latent TB: 1636 - Active TB only: 157 - Total: 1793 @@ -265,8 +265,7 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * PNG images 8 bits RGB - * Resolution: 512x512 pixels + * PNG images 8 bits RGB, 512 x 512 pixels * Output image: @@ -276,8 +275,10 @@ class DataModule(CachingDataModule): * Final specifications: - * RGB, encoded as a 3-plane image, 8 bits - * Square (512x512 px) + * RGB, encoded as a 3-plane tensor using 32-bit floats, square + (512x512 pixels) + * Labels: 0 (healthy, latent tb or sick but no tb depending on the + protocol), 1 (active tuberculosis) """ def __init__(self, split_filename: str):