From 11b8dd3d56a95cabbf18b0ba86ed064043d57d8d Mon Sep 17 00:00:00 2001
From: dcarron <daniel.carron@idiap.ch>
Date: Thu, 13 Jun 2024 17:16:52 +0200
Subject: [PATCH] [segmentation] Add avdrive database

---
 .../config/data/avdrive/__init__.py           |   0
 .../config/data/avdrive/datamodule.py         | 103 +++++++++
 .../config/data/avdrive/default.json          | 206 ++++++++++++++++++
 .../config/data/avdrive/default.py            |  13 ++
 .../config/data/avdrive/second-annotator.json | 104 +++++++++
 5 files changed, 426 insertions(+)
 create mode 100644 src/mednet/libs/segmentation/config/data/avdrive/__init__.py
 create mode 100644 src/mednet/libs/segmentation/config/data/avdrive/datamodule.py
 create mode 100644 src/mednet/libs/segmentation/config/data/avdrive/default.json
 create mode 100644 src/mednet/libs/segmentation/config/data/avdrive/default.py
 create mode 100644 src/mednet/libs/segmentation/config/data/avdrive/second-annotator.json

diff --git a/src/mednet/libs/segmentation/config/data/avdrive/__init__.py b/src/mednet/libs/segmentation/config/data/avdrive/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/mednet/libs/segmentation/config/data/avdrive/datamodule.py b/src/mednet/libs/segmentation/config/data/avdrive/datamodule.py
new file mode 100644
index 00000000..be6adce1
--- /dev/null
+++ b/src/mednet/libs/segmentation/config/data/avdrive/datamodule.py
@@ -0,0 +1,103 @@
+# SPDX-FileCopyrightText: Copyright © 2024 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+"""AV_DRIVE dataset for Vessel Segmentation."""
+
+import os
+import pathlib
+
+import PIL.Image
+from mednet.libs.common.data.datamodule import CachingDataModule
+from mednet.libs.common.data.split import make_split
+from mednet.libs.common.data.typing import Sample
+from mednet.libs.common.models.transforms import crop_image_to_mask
+from mednet.libs.common.utils.rc import load_rc
+from mednet.libs.segmentation.data.typing import (
+    SegmentationRawDataLoader as _SegmentationRawDataLoader,
+)
+from torchvision import tv_tensors
+from torchvision.transforms.functional import to_tensor
+
+CONFIGURATION_KEY_DATADIR = "datadir." + (__name__.rsplit(".", 2)[-2])
+"""Key to search for in the configuration file for the root directory of this
+database."""
+
+
+class SegmentationRawDataLoader(_SegmentationRawDataLoader):
+    """A specialized raw-data-loader for the avdrive dataset."""
+
+    datadir: pathlib.Path
+    """This variable contains the base directory where the database raw data is
+    stored."""
+
+    def __init__(self):
+        self.drive_datadir = pathlib.Path(
+            load_rc().get(CONFIGURATION_KEY_DATADIR, os.path.realpath(os.curdir))
+        )
+        self.av_drive_datadir = pathlib.Path(
+            load_rc().get("datadir.avdrive", os.path.realpath(os.curdir))
+        )
+
+    def sample(self, sample: tuple[str, str, str]) -> Sample:
+        """Load a single image sample from the disk.
+
+        Parameters
+        ----------
+        sample
+            A tuple containing path suffixes to the sample image, target, and mask
+            to be loaded, within the dataset root folder.
+
+        Returns
+        -------
+            The sample representation.
+        """
+
+        image = to_tensor(
+            PIL.Image.open(self.drive_datadir / sample[0]).convert(mode="RGB")
+        )
+        target = to_tensor(
+            PIL.Image.open(self.av_drive_datadir / sample[1]).convert(
+                mode="1", dither=None
+            )
+        )
+        mask = to_tensor(
+            PIL.Image.open(self.drive_datadir / sample[2]).convert(
+                mode="1", dither=None
+            )
+        )
+
+        tensor = tv_tensors.Image(crop_image_to_mask(image, mask))
+        target = tv_tensors.Image(crop_image_to_mask(target, mask))
+        mask = tv_tensors.Mask(crop_image_to_mask(mask, mask))
+
+        return tensor, dict(target=target, mask=mask, name=sample[0])  # type: ignore[arg-type]
+
+
+class DataModule(CachingDataModule):
+    """AV_DRIVE dataset for Vessel Segmentation.
+
+    The DRIVE database has been established to enable comparative studies on
+    segmentation of blood vessels in retinal images.
+
+    * Reference: [DRIVE-2004]_
+    * Original resolution (height x width): 584 x 565
+    * Split reference: [DRIVE-2004]_
+    * Protocol ``default``:
+
+    * Training samples: 20 (including labels and masks)
+    * Test samples: 20 (including labels from annotator 1 and masks
+
+    Parameters
+    ----------
+    split_filename
+        Name of the .json file containing the split to load.
+    """
+
+    def __init__(self, split_filename: str):
+        assert __package__ is not None
+        super().__init__(
+            database_split=make_split(__package__, split_filename),
+            raw_data_loader=SegmentationRawDataLoader(),
+            database_name=__package__.rsplit(".", 1)[1],
+            split_name=pathlib.Path(split_filename).stem,
+        )
diff --git a/src/mednet/libs/segmentation/config/data/avdrive/default.json b/src/mednet/libs/segmentation/config/data/avdrive/default.json
new file mode 100644
index 00000000..6707e6ed
--- /dev/null
+++ b/src/mednet/libs/segmentation/config/data/avdrive/default.json
@@ -0,0 +1,206 @@
+{
+ "train": [
+  [
+   "training/images/21_training.tif",
+   "training/1st_manual/21_manual1.gif",
+   "training/mask/21_training_mask.gif"
+  ],
+  [
+   "training/images/22_training.tif",
+   "training/1st_manual/22_manual1.gif",
+   "training/mask/22_training_mask.gif"
+  ],
+  [
+   "training/images/23_training.tif",
+   "training/1st_manual/23_manual1.gif",
+   "training/mask/23_training_mask.gif"
+  ],
+  [
+   "training/images/24_training.tif",
+   "training/1st_manual/24_manual1.gif",
+   "training/mask/24_training_mask.gif"
+  ],
+  [
+   "training/images/25_training.tif",
+   "training/1st_manual/25_manual1.gif",
+   "training/mask/25_training_mask.gif"
+  ],
+  [
+   "training/images/26_training.tif",
+   "training/1st_manual/26_manual1.gif",
+   "training/mask/26_training_mask.gif"
+  ],
+  [
+   "training/images/27_training.tif",
+   "training/1st_manual/27_manual1.gif",
+   "training/mask/27_training_mask.gif"
+  ],
+  [
+   "training/images/28_training.tif",
+   "training/1st_manual/28_manual1.gif",
+   "training/mask/28_training_mask.gif"
+  ],
+  [
+   "training/images/29_training.tif",
+   "training/1st_manual/29_manual1.gif",
+   "training/mask/29_training_mask.gif"
+  ],
+  [
+   "training/images/30_training.tif",
+   "training/1st_manual/30_manual1.gif",
+   "training/mask/30_training_mask.gif"
+  ],
+  [
+   "training/images/31_training.tif",
+   "training/1st_manual/31_manual1.gif",
+   "training/mask/31_training_mask.gif"
+  ],
+  [
+   "training/images/32_training.tif",
+   "training/1st_manual/32_manual1.gif",
+   "training/mask/32_training_mask.gif"
+  ],
+  [
+   "training/images/33_training.tif",
+   "training/1st_manual/33_manual1.gif",
+   "training/mask/33_training_mask.gif"
+  ],
+  [
+   "training/images/34_training.tif",
+   "training/1st_manual/34_manual1.gif",
+   "training/mask/34_training_mask.gif"
+  ],
+  [
+   "training/images/35_training.tif",
+   "training/1st_manual/35_manual1.gif",
+   "training/mask/35_training_mask.gif"
+  ],
+  [
+   "training/images/36_training.tif",
+   "training/1st_manual/36_manual1.gif",
+   "training/mask/36_training_mask.gif"
+  ],
+  [
+   "training/images/37_training.tif",
+   "training/1st_manual/37_manual1.gif",
+   "training/mask/37_training_mask.gif"
+  ],
+  [
+   "training/images/38_training.tif",
+   "training/1st_manual/38_manual1.gif",
+   "training/mask/38_training_mask.gif"
+  ],
+  [
+   "training/images/39_training.tif",
+   "training/1st_manual/39_manual1.gif",
+   "training/mask/39_training_mask.gif"
+  ],
+  [
+   "training/images/40_training.tif",
+   "training/1st_manual/40_manual1.gif",
+   "training/mask/40_training_mask.gif"
+  ]
+ ],
+ "test": [
+  [
+   "test/images/01_test.tif",
+   "test/1st_manual/01_manual1.gif",
+   "test/mask/01_test_mask.gif"
+  ],
+  [
+   "test/images/02_test.tif",
+   "test/1st_manual/02_manual1.gif",
+   "test/mask/02_test_mask.gif"
+  ],
+  [
+   "test/images/03_test.tif",
+   "test/1st_manual/03_manual1.gif",
+   "test/mask/03_test_mask.gif"
+  ],
+  [
+   "test/images/04_test.tif",
+   "test/1st_manual/04_manual1.gif",
+   "test/mask/04_test_mask.gif"
+  ],
+  [
+   "test/images/05_test.tif",
+   "test/1st_manual/05_manual1.gif",
+   "test/mask/05_test_mask.gif"
+  ],
+  [
+   "test/images/06_test.tif",
+   "test/1st_manual/06_manual1.gif",
+   "test/mask/06_test_mask.gif"
+  ],
+  [
+   "test/images/07_test.tif",
+   "test/1st_manual/07_manual1.gif",
+   "test/mask/07_test_mask.gif"
+  ],
+  [
+   "test/images/08_test.tif",
+   "test/1st_manual/08_manual1.gif",
+   "test/mask/08_test_mask.gif"
+  ],
+  [
+   "test/images/09_test.tif",
+   "test/1st_manual/09_manual1.gif",
+   "test/mask/09_test_mask.gif"
+  ],
+  [
+   "test/images/10_test.tif",
+   "test/1st_manual/10_manual1.gif",
+   "test/mask/10_test_mask.gif"
+  ],
+  [
+   "test/images/11_test.tif",
+   "test/1st_manual/11_manual1.gif",
+   "test/mask/11_test_mask.gif"
+  ],
+  [
+   "test/images/12_test.tif",
+   "test/1st_manual/12_manual1.gif",
+   "test/mask/12_test_mask.gif"
+  ],
+  [
+   "test/images/13_test.tif",
+   "test/1st_manual/13_manual1.gif",
+   "test/mask/13_test_mask.gif"
+  ],
+  [
+   "test/images/14_test.tif",
+   "test/1st_manual/14_manual1.gif",
+   "test/mask/14_test_mask.gif"
+  ],
+  [
+   "test/images/15_test.tif",
+   "test/1st_manual/15_manual1.gif",
+   "test/mask/15_test_mask.gif"
+  ],
+  [
+   "test/images/16_test.tif",
+   "test/1st_manual/16_manual1.gif",
+   "test/mask/16_test_mask.gif"
+  ],
+  [
+   "test/images/17_test.tif",
+   "test/1st_manual/17_manual1.gif",
+   "test/mask/17_test_mask.gif"
+  ],
+  [
+   "test/images/18_test.tif",
+   "test/1st_manual/18_manual1.gif",
+   "test/mask/18_test_mask.gif"
+  ],
+  [
+   "test/images/19_test.tif",
+   "test/1st_manual/19_manual1.gif",
+   "test/mask/19_test_mask.gif"
+  ],
+  [
+   "test/images/20_test.tif",
+   "test/1st_manual/20_manual1.gif",
+   "test/mask/20_test_mask.gif"
+  ]
+ ]
+}
diff --git a/src/mednet/libs/segmentation/config/data/avdrive/default.py b/src/mednet/libs/segmentation/config/data/avdrive/default.py
new file mode 100644
index 00000000..c74d2c48
--- /dev/null
+++ b/src/mednet/libs/segmentation/config/data/avdrive/default.py
@@ -0,0 +1,13 @@
+# SPDX-FileCopyrightText: Copyright © 2024 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+"""DRIVE dataset for Vessel Segmentation (default protocol).
+
+* Split reference: [DRIVE-2004]_
+* This configuration resolution: 544 x 544 (center-crop)
+* We are using DRIVE dataset for artery vein segmentation
+"""
+
+from mednet.libs.segmentation.config.data.avdrive.datamodule import DataModule
+
+datamodule = DataModule("default.json")
diff --git a/src/mednet/libs/segmentation/config/data/avdrive/second-annotator.json b/src/mednet/libs/segmentation/config/data/avdrive/second-annotator.json
new file mode 100644
index 00000000..fee520de
--- /dev/null
+++ b/src/mednet/libs/segmentation/config/data/avdrive/second-annotator.json
@@ -0,0 +1,104 @@
+{
+ "test": [
+  [
+   "test/images/01_test.tif",
+   "test/2nd_manual/01_manual2.gif",
+   "test/mask/01_test_mask.gif"
+  ],
+  [
+   "test/images/02_test.tif",
+   "test/2nd_manual/02_manual2.gif",
+   "test/mask/02_test_mask.gif"
+  ],
+  [
+   "test/images/03_test.tif",
+   "test/2nd_manual/03_manual2.gif",
+   "test/mask/03_test_mask.gif"
+  ],
+  [
+   "test/images/04_test.tif",
+   "test/2nd_manual/04_manual2.gif",
+   "test/mask/04_test_mask.gif"
+  ],
+  [
+   "test/images/05_test.tif",
+   "test/2nd_manual/05_manual2.gif",
+   "test/mask/05_test_mask.gif"
+  ],
+  [
+   "test/images/06_test.tif",
+   "test/2nd_manual/06_manual2.gif",
+   "test/mask/06_test_mask.gif"
+  ],
+  [
+   "test/images/07_test.tif",
+   "test/2nd_manual/07_manual2.gif",
+   "test/mask/07_test_mask.gif"
+  ],
+  [
+   "test/images/08_test.tif",
+   "test/2nd_manual/08_manual2.gif",
+   "test/mask/08_test_mask.gif"
+  ],
+  [
+   "test/images/09_test.tif",
+   "test/2nd_manual/09_manual2.gif",
+   "test/mask/09_test_mask.gif"
+  ],
+  [
+   "test/images/10_test.tif",
+   "test/2nd_manual/10_manual2.gif",
+   "test/mask/10_test_mask.gif"
+  ],
+  [
+   "test/images/11_test.tif",
+   "test/2nd_manual/11_manual2.gif",
+   "test/mask/11_test_mask.gif"
+  ],
+  [
+   "test/images/12_test.tif",
+   "test/2nd_manual/12_manual2.gif",
+   "test/mask/12_test_mask.gif"
+  ],
+  [
+   "test/images/13_test.tif",
+   "test/2nd_manual/13_manual2.gif",
+   "test/mask/13_test_mask.gif"
+  ],
+  [
+   "test/images/14_test.tif",
+   "test/2nd_manual/14_manual2.gif",
+   "test/mask/14_test_mask.gif"
+  ],
+  [
+   "test/images/15_test.tif",
+   "test/2nd_manual/15_manual2.gif",
+   "test/mask/15_test_mask.gif"
+  ],
+  [
+   "test/images/16_test.tif",
+   "test/2nd_manual/16_manual2.gif",
+   "test/mask/16_test_mask.gif"
+  ],
+  [
+   "test/images/17_test.tif",
+   "test/2nd_manual/17_manual2.gif",
+   "test/mask/17_test_mask.gif"
+  ],
+  [
+   "test/images/18_test.tif",
+   "test/2nd_manual/18_manual2.gif",
+   "test/mask/18_test_mask.gif"
+  ],
+  [
+   "test/images/19_test.tif",
+   "test/2nd_manual/19_manual2.gif",
+   "test/mask/19_test_mask.gif"
+  ],
+  [
+   "test/images/20_test.tif",
+   "test/2nd_manual/20_manual2.gif",
+   "test/mask/20_test_mask.gif"
+  ]
+ ]
+}
-- 
GitLab