diff --git a/tests/data/lfs b/tests/data/lfs
index 05344d20182ad4169fc5b9c38052d629aded30ed..e2e4a98d675ec61dac44c339c28d91fcb180b398 160000
--- a/tests/data/lfs
+++ b/tests/data/lfs
@@ -1 +1 @@
-Subproject commit 05344d20182ad4169fc5b9c38052d629aded30ed
+Subproject commit e2e4a98d675ec61dac44c339c28d91fcb180b398
diff --git a/tests/test_hivtb.py b/tests/test_hivtb.py
index 03066c2fade1e15482a8523711e5d972d081897d..6373d0ee5cc54c5ca3d23d23f2f57efaf6693c93 100644
--- a/tests/test_hivtb.py
+++ b/tests/test_hivtb.py
@@ -7,6 +7,10 @@ import importlib
 
 import pytest
 
+from torchvision.transforms.functional import to_pil_image
+
+from mednet.data.split import JSONDatabaseSplit
+
 
 def id_function(val):
     if isinstance(val, dict):
@@ -90,3 +94,39 @@ def test_loading(database_checkers, name: str, dataset: str):
             expected_num_labels=1,
         )
         limit -= 1
+
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.hivtb")
+def test_loaded_image_quality(datadir):
+    reference_histogram_file = str(
+        datadir / "lfs/histograms/raw_data/histograms_hivtb_fold_0.json"
+    )
+    ref_histogram_splits = JSONDatabaseSplit(reference_histogram_file)
+
+    datamodule = importlib.import_module(
+        ".fold_0", "mednet.config.data.hivtb"
+    ).datamodule
+
+    datamodule.model_transforms = []
+    datamodule.setup("predict")
+
+    for split_name in ref_histogram_splits:
+        datamodule_split = datamodule.splits[split_name]
+
+        loader = datamodule_split[0][1]
+
+        for ref_data in ref_histogram_splits[split_name]:
+            sample_path = ref_data[0]
+            ref_histogram = ref_data[1]
+
+            test_sample = (
+                sample_path,
+                -1,
+            )  # Need to specify a label even if not used.
+            image_data = loader.sample(test_sample)[0]
+
+            img = to_pil_image(image_data)
+
+            histogram = img.histogram()
+
+            assert histogram == ref_histogram
diff --git a/tests/test_indian.py b/tests/test_indian.py
index a35cfdb318ee14f299b9dc75932c0c3a715c3d40..452aa6c6790571613889d7515f51e077655d59d3 100644
--- a/tests/test_indian.py
+++ b/tests/test_indian.py
@@ -7,11 +7,12 @@ dataset A/dataset B) dataset.
 """
 
 import importlib
-import json
 
 import pytest
 
-from PIL import Image
+from torchvision.transforms.functional import to_pil_image
+
+from mednet.data.split import JSONDatabaseSplit
 
 
 def id_function(val):
@@ -102,6 +103,11 @@ def test_loading(database_checkers, name: str, dataset: str):
 
 @pytest.mark.skip_if_rc_var_not_set("datadir.indian")
 def test_loaded_image_quality(datadir):
+    reference_histogram_file = str(
+        datadir / "lfs/histograms/raw_data/histograms_indian_default.json"
+    )
+    ref_histogram_splits = JSONDatabaseSplit(reference_histogram_file)
+
     datamodule = importlib.import_module(
         ".default", "mednet.config.data.indian"
     ).datamodule
@@ -109,17 +115,23 @@ def test_loaded_image_quality(datadir):
     datamodule.model_transforms = []
     datamodule.setup("predict")
 
-    loader = datamodule.splits["train"][0][1]
-    first_sample = datamodule.splits["train"][0][0][0]
-    image_data = loader.sample(first_sample)[0].numpy()[
-        0, :, :
-    ]  # PIL expects grayscale to not have any leading dim
-    img = Image.fromarray(image_data, mode="L")
+    for split_name in ref_histogram_splits:
+        datamodule_split = datamodule.splits[split_name]
+
+        loader = datamodule_split[0][1]
+
+        for ref_data in ref_histogram_splits[split_name]:
+            sample_path = ref_data[0]
+            ref_histogram = ref_data[1]
+
+            test_sample = (
+                sample_path,
+                -1,
+            )  # Need to specify a label even if not used.
+            image_data = loader.sample(test_sample)[0]
 
-    histogram = img.histogram()
+            img = to_pil_image(image_data)
 
-    reference_histogram_file = str(datadir / "histogram_indian.json")
-    with open(reference_histogram_file) as i_f:
-        ref_histogram = json.load(i_f)["histogram"]
+            histogram = img.histogram()
 
-    assert histogram == ref_histogram
+            assert histogram == ref_histogram
diff --git a/tests/test_montgomery.py b/tests/test_montgomery.py
index 349898d91500833c1141e8ddfb0471f8af4e896b..91c7aa598b0090e378184ed77894f176a919893a 100644
--- a/tests/test_montgomery.py
+++ b/tests/test_montgomery.py
@@ -4,11 +4,12 @@
 """Tests for Montgomery dataset."""
 
 import importlib
-import json
 
 import pytest
 
-from PIL import Image
+from torchvision.transforms.functional import to_pil_image
+
+from mednet.data.split import JSONDatabaseSplit
 
 
 def id_function(val):
@@ -99,6 +100,11 @@ def test_loading(database_checkers, name: str, dataset: str):
 
 @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
 def test_loaded_image_quality(datadir):
+    reference_histogram_file = str(
+        datadir / "lfs/histograms/raw_data/histograms_montgomery_default.json"
+    )
+    ref_histogram_splits = JSONDatabaseSplit(reference_histogram_file)
+
     datamodule = importlib.import_module(
         ".default", "mednet.config.data.montgomery"
     ).datamodule
@@ -106,17 +112,23 @@ def test_loaded_image_quality(datadir):
     datamodule.model_transforms = []
     datamodule.setup("predict")
 
-    loader = datamodule.splits["train"][0][1]
-    first_sample = datamodule.splits["train"][0][0][0]
-    image_data = loader.sample(first_sample)[0].numpy()[
-        0, :, :
-    ]  # PIL expects grayscale to not have any leading dim
-    img = Image.fromarray(image_data, mode="L")
+    for split_name in ref_histogram_splits:
+        datamodule_split = datamodule.splits[split_name]
+
+        loader = datamodule_split[0][1]
+
+        for ref_data in ref_histogram_splits[split_name]:
+            sample_path = ref_data[0]
+            ref_histogram = ref_data[1]
+
+            test_sample = (
+                sample_path,
+                -1,
+            )  # Need to specify a label even if not used.
+            image_data = loader.sample(test_sample)[0]
 
-    histogram = img.histogram()
+            img = to_pil_image(image_data)
 
-    reference_histogram_file = str(datadir / "histogram_montgomery.json")
-    with open(reference_histogram_file) as i_f:
-        ref_histogram = json.load(i_f)["histogram"]
+            histogram = img.histogram()
 
-    assert histogram == ref_histogram
+            assert histogram == ref_histogram
diff --git a/tests/test_nih_cxr14.py b/tests/test_nih_cxr14.py
index dc73aa2907616fec6a9d48cc7191aa0403a5568b..a2e014f3477ff7afaca92cba74d08f1d9edece3f 100644
--- a/tests/test_nih_cxr14.py
+++ b/tests/test_nih_cxr14.py
@@ -7,6 +7,10 @@ import importlib
 
 import pytest
 
+from torchvision.transforms.functional import to_pil_image
+
+from mednet.data.split import JSONDatabaseSplit
+
 
 def id_function(val):
     if isinstance(val, dict):
@@ -44,7 +48,7 @@ testdata = [
 ]
 
 
-@pytest.mark.skip_if_rc_var_not_set("datadir.padchest")
+@pytest.mark.skip_if_rc_var_not_set("datadir.nih_cxr14")
 @pytest.mark.parametrize("name,dataset,num_labels", testdata)
 def test_loading(database_checkers, name: str, dataset: str, num_labels: int):
     datamodule = importlib.import_module(
@@ -70,3 +74,39 @@ def test_loading(database_checkers, name: str, dataset: str, num_labels: int):
             expected_image_shape=(1, 1024, 1024),
         )
         limit -= 1
+
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.nih_cxr14")
+def test_loaded_image_quality(datadir):
+    reference_histogram_file = str(
+        datadir / "lfs/histograms/raw_data/histograms_nih_cxr14_default.json"
+    )
+    ref_histogram_splits = JSONDatabaseSplit(reference_histogram_file)
+
+    datamodule = importlib.import_module(
+        ".default", "mednet.config.data.nih_cxr14"
+    ).datamodule
+
+    datamodule.model_transforms = []
+    datamodule.setup("predict")
+
+    for split_name in ref_histogram_splits:
+        datamodule_split = datamodule.splits[split_name]
+
+        loader = datamodule_split[0][1]
+
+        for ref_data in ref_histogram_splits[split_name]:
+            sample_path = ref_data[0]
+            ref_histogram = ref_data[1]
+
+            test_sample = (
+                sample_path,
+                -1,
+            )  # Need to specify a label even if not used.
+            image_data = loader.sample(test_sample)[0]
+
+            img = to_pil_image(image_data)
+
+            histogram = img.histogram()
+
+            assert histogram == ref_histogram
diff --git a/tests/test_padchest.py b/tests/test_padchest.py
index 98eaeb2d35036727eab39cd85578d652966c1b36..47494d941ee07b948245bb6ef5f34255aa81f1d5 100644
--- a/tests/test_padchest.py
+++ b/tests/test_padchest.py
@@ -7,6 +7,10 @@ import importlib
 
 import pytest
 
+from torchvision.transforms.functional import to_pil_image
+
+from mednet.data.split import JSONDatabaseSplit
+
 
 def id_function(val):
     if isinstance(val, dict):
@@ -75,3 +79,39 @@ def test_loading(database_checkers, name: str, dataset: str, num_labels: int):
                 expected_num_labels=num_labels,
             )
             limit -= 1
+
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.padchest")
+def test_loaded_image_quality(datadir):
+    reference_histogram_file = str(
+        datadir / "lfs/histograms/raw_data/histograms_padchest_idiap.json"
+    )
+    ref_histogram_splits = JSONDatabaseSplit(reference_histogram_file)
+
+    datamodule = importlib.import_module(
+        ".idiap", "mednet.config.data.padchest"
+    ).datamodule
+
+    datamodule.model_transforms = []
+    datamodule.setup("predict")
+
+    for split_name in ref_histogram_splits:
+        datamodule_split = datamodule.splits[split_name]
+
+        loader = datamodule_split[0][1]
+
+        for ref_data in ref_histogram_splits[split_name]:
+            sample_path = ref_data[0]
+            ref_histogram = ref_data[1]
+
+            test_sample = (
+                sample_path,
+                -1,
+            )  # Need to specify a label even if not used.
+            image_data = loader.sample(test_sample)[0]
+
+            img = to_pil_image(image_data)
+
+            histogram = img.histogram()
+
+            assert histogram == ref_histogram
diff --git a/tests/test_shenzhen.py b/tests/test_shenzhen.py
index ff0a8af2a94d673079c55f095ed2340c41c9ed9e..e2283fadab0f4af91365a43e6474b8083bb2808b 100644
--- a/tests/test_shenzhen.py
+++ b/tests/test_shenzhen.py
@@ -4,11 +4,12 @@
 """Tests for Shenzhen dataset."""
 
 import importlib
-import json
 
 import pytest
 
-from PIL import Image
+from torchvision.transforms.functional import to_pil_image
+
+from mednet.data.split import JSONDatabaseSplit
 
 
 def id_function(val):
@@ -99,6 +100,11 @@ def test_loading(database_checkers, name: str, dataset: str):
 
 @pytest.mark.skip_if_rc_var_not_set("datadir.shenzhen")
 def test_loaded_image_quality(datadir):
+    reference_histogram_file = str(
+        datadir / "lfs/histograms/raw_data/histograms_shenzhen_default.json"
+    )
+    ref_histogram_splits = JSONDatabaseSplit(reference_histogram_file)
+
     datamodule = importlib.import_module(
         ".default", "mednet.config.data.shenzhen"
     ).datamodule
@@ -106,17 +112,23 @@ def test_loaded_image_quality(datadir):
     datamodule.model_transforms = []
     datamodule.setup("predict")
 
-    loader = datamodule.splits["train"][0][1]
-    first_sample = datamodule.splits["train"][0][0][0]
-    image_data = loader.sample(first_sample)[0].numpy()[
-        0, :, :
-    ]  # PIL expects grayscale to not have any leading dim
-    img = Image.fromarray(image_data, mode="L")
+    for split_name in ref_histogram_splits:
+        datamodule_split = datamodule.splits[split_name]
+
+        loader = datamodule_split[0][1]
+
+        for ref_data in ref_histogram_splits[split_name]:
+            sample_path = ref_data[0]
+            ref_histogram = ref_data[1]
+
+            test_sample = (
+                sample_path,
+                -1,
+            )  # Need to specify a label even if not used.
+            image_data = loader.sample(test_sample)[0]
 
-    histogram = img.histogram()
+            img = to_pil_image(image_data)
 
-    reference_histogram_file = str(datadir / "histogram_shenzhen.json")
-    with open(reference_histogram_file) as i_f:
-        ref_histogram = json.load(i_f)["histogram"]
+            histogram = img.histogram()
 
-    assert histogram == ref_histogram
+            assert histogram == ref_histogram
diff --git a/tests/test_tbpoc.py b/tests/test_tbpoc.py
index 58f762d57e6e8c822dc5b5c23c026e9dd1934a66..75eaeca79ac964bd5788ba135d02cd8d01b87673 100644
--- a/tests/test_tbpoc.py
+++ b/tests/test_tbpoc.py
@@ -7,6 +7,10 @@ import importlib
 
 import pytest
 
+from torchvision.transforms.functional import to_pil_image
+
+from mednet.data.split import JSONDatabaseSplit
+
 
 def id_function(val):
     if isinstance(val, dict):
@@ -96,3 +100,39 @@ def test_loading(database_checkers, name: str, dataset: str):
             expected_num_labels=1,
         )
         limit -= 1
+
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbpoc")
+def test_loaded_image_quality(datadir):
+    reference_histogram_file = str(
+        datadir / "lfs/histograms/raw_data/histograms_tbpoc_fold_0.json"
+    )
+    ref_histogram_splits = JSONDatabaseSplit(reference_histogram_file)
+
+    datamodule = importlib.import_module(
+        ".fold_0", "mednet.config.data.tbpoc"
+    ).datamodule
+
+    datamodule.model_transforms = []
+    datamodule.setup("predict")
+
+    for split_name in ref_histogram_splits:
+        datamodule_split = datamodule.splits[split_name]
+
+        loader = datamodule_split[0][1]
+
+        for ref_data in ref_histogram_splits[split_name]:
+            sample_path = ref_data[0]
+            ref_histogram = ref_data[1]
+
+            test_sample = (
+                sample_path,
+                -1,
+            )  # Need to specify a label even if not used.
+            image_data = loader.sample(test_sample)[0]
+
+            img = to_pil_image(image_data)
+
+            histogram = img.histogram()
+
+            assert histogram == ref_histogram
diff --git a/tests/test_tbx11k.py b/tests/test_tbx11k.py
index 6c022dbaaf99357b8acea8843a2947fc7cebc9ce..7b2a60c9471c4a699915bcacba8deb605c0680f6 100644
--- a/tests/test_tbx11k.py
+++ b/tests/test_tbx11k.py
@@ -9,6 +9,10 @@ import typing
 import pytest
 import torch
 
+from torchvision.transforms.functional import to_pil_image
+
+from mednet.data.split import JSONDatabaseSplit
+
 
 def id_function(val):
     if isinstance(val, (dict, tuple)):
@@ -283,3 +287,39 @@ def test_loading(name: str, dataset: str, prefixes: typing.Sequence[str]):
             expected_image_shape=(3, 512, 512),
         )
         limit -= 1
+
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k")
+def test_loaded_image_quality(datadir):
+    reference_histogram_file = str(
+        datadir / "lfs/histograms/raw_data/histograms_tbx11k_v1_fold_0.json"
+    )
+    ref_histogram_splits = JSONDatabaseSplit(reference_histogram_file)
+
+    datamodule = importlib.import_module(
+        ".v1_fold_0", "mednet.config.data.tbx11k"
+    ).datamodule
+
+    datamodule.model_transforms = []
+    datamodule.setup("predict")
+
+    for split_name in ref_histogram_splits:
+        datamodule_split = datamodule.splits[split_name]
+
+        loader = datamodule_split[0][1]
+
+        for ref_data in ref_histogram_splits[split_name]:
+            sample_path = ref_data[0]
+            ref_histogram = ref_data[1]
+
+            test_sample = (
+                sample_path,
+                -1,
+            )  # Need to specify a label even if not used.
+            image_data = loader.sample(test_sample)[0]
+
+            img = to_pil_image(image_data)
+
+            histogram = img.histogram()
+
+            assert histogram == ref_histogram