diff --git a/tests/test_11k.py b/tests/test_11k.py
index 9f795f5f6fb4d858eb4300289b505f609db1170e..c8d7405c63308f3e0b04b6e8145740143a3a6b8a 100644
--- a/tests/test_11k.py
+++ b/tests/test_11k.py
@@ -69,6 +69,75 @@ def test_protocol_consistency():
         for s in subset["test"]:
             assert s.label in [0.0, 1.0]
 
+def test_protocol_consistency_bbox():
+    from ptbench.data.tbx11k_simplified import dataset_with_bboxes
+
+    # Default protocol
+    subset = dataset_with_bboxes.subsets("default")
+    assert len(subset) == 3
+
+    assert "train" in subset
+    assert len(subset["train"]) == 2767
+    for s in subset["train"]:
+        assert s.key.startswith("images/")
+
+    assert "validation" in subset
+    assert len(subset["validation"]) == 706
+    for s in subset["validation"]:
+        assert s.key.startswith("images/")
+
+    assert "test" in subset
+    assert len(subset["test"]) == 957
+    for s in subset["test"]:
+        assert s.key.startswith("images/")
+
+    # Check labels
+    for s in subset["train"]:
+        assert s.label in [0.0, 1.0]
+
+    for s in subset["validation"]:
+        assert s.label in [0.0, 1.0]
+
+    for s in subset["test"]:
+        assert s.label in [0.0, 1.0]
+
+    # Check bounding boxes
+    for s in subset["train"]:
+        assert s.bboxes == "none" or s.bboxes[0].startswith("{'xmin':")
+
+    # Cross-validation fold 0-9
+    for f in range(10):
+        subset = dataset_with_bboxes.subsets("fold_" + str(f))
+        assert len(subset) == 3
+
+        assert "train" in subset
+        assert len(subset["train"]) == 3177
+        for s in subset["train"]:
+            assert s.key.startswith("images/")
+
+        assert "validation" in subset
+        assert len(subset["validation"]) == 810
+        for s in subset["validation"]:
+            assert s.key.startswith("images/")
+
+        assert "test" in subset
+        assert len(subset["test"]) == 443
+        for s in subset["test"]:
+            assert s.key.startswith("images/")
+
+        # Check labels
+        for s in subset["train"]:
+            assert s.label in [0.0, 1.0]
+
+        for s in subset["validation"]:
+            assert s.label in [0.0, 1.0]
+
+        for s in subset["test"]:
+            assert s.label in [0.0, 1.0]
+
+        # Check bounding boxes
+        for s in subset["train"]:
+            assert s.bboxes == "none" or s.bboxes[0].startswith("{'xmin':")
 
 @pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified")
 def test_loading():
@@ -80,10 +149,9 @@ def test_loading():
         assert len(data) == 2
 
         assert "data" in data
-        assert data["data"].size in (
-            (512, 512)
-        )
-        assert data["data"].mode == "RGB"  # Check colors
+        assert data["data"].size == (512, 512)
+
+        assert data["data"].mode == "L"  # Check colors
 
         assert "label" in data
         assert data["label"] in [0, 1]  # Check labels
@@ -94,9 +162,40 @@ def test_loading():
     for s in subset["train"][:limit]:
         _check_sample(s)
 
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified")
+def test_loading_bbox():
+    from ptbench.data.tbx11k_simplified import dataset_with_bboxes
+
+    def _check_sample(s):
+        data = s.data
+        assert isinstance(data, dict)
+        assert len(data) == 3
+
+        assert "data" in data
+        assert data["data"].size == (512, 512)
+
+        assert data["data"].mode == "L"  # Check colors
+
+        assert "label" in data
+        assert data["label"] in [0, 1]  # Check labels
+
+        assert "bboxes" in data
+        assert data["bboxes"] == "none" or data["bboxes"][0].startswith("{'xmin':")
+
+    limit = 30  # use this to limit testing to first images only, else None
+
+    subset = dataset_with_bboxes.subsets("default")
+    for s in subset["train"][:limit]:
+        _check_sample(s)
 
 @pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified")
 def test_check():
     from ptbench.data.tbx11k_simplified import dataset
 
     assert dataset.check() == 0
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified")
+def test_check_bbox():
+    from ptbench.data.tbx11k_simplified import dataset_with_bboxes
+
+    assert dataset_with_bboxes.check() == 0
diff --git a/tests/test_11k_v2.py b/tests/test_11k_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..cef3df72c5f91c9f3767f50d13382903049aa56e
--- /dev/null
+++ b/tests/test_11k_v2.py
@@ -0,0 +1,201 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Tests for TBX11K simplified dataset."""
+
+import pytest
+
+
+def test_protocol_consistency():
+    from ptbench.data.tbx11k_simplified_v2 import dataset
+
+    # Default protocol
+    subset = dataset.subsets("default")
+    assert len(subset) == 3
+
+    assert "train" in subset
+    assert len(subset["train"]) == 2767
+    for s in subset["train"]:
+        assert s.key.startswith("images/")
+
+    assert "validation" in subset
+    assert len(subset["validation"]) == 706
+    for s in subset["validation"]:
+        assert s.key.startswith("images/")
+
+    assert "test" in subset
+    assert len(subset["test"]) == 957
+    for s in subset["test"]:
+        assert s.key.startswith("images/")
+
+    # Check labels
+    for s in subset["train"]:
+        assert s.label in [0.0, 1.0]
+
+    for s in subset["validation"]:
+        assert s.label in [0.0, 1.0]
+
+    for s in subset["test"]:
+        assert s.label in [0.0, 1.0]
+
+    # Cross-validation fold 0-9
+    for f in range(10):
+        subset = dataset.subsets("fold_" + str(f))
+        assert len(subset) == 3
+
+        assert "train" in subset
+        assert len(subset["train"]) == 3177
+        for s in subset["train"]:
+            assert s.key.startswith("images/")
+
+        assert "validation" in subset
+        assert len(subset["validation"]) == 810
+        for s in subset["validation"]:
+            assert s.key.startswith("images/")
+
+        assert "test" in subset
+        assert len(subset["test"]) == 443
+        for s in subset["test"]:
+            assert s.key.startswith("images/")
+
+        # Check labels
+        for s in subset["train"]:
+            assert s.label in [0.0, 1.0]
+
+        for s in subset["validation"]:
+            assert s.label in [0.0, 1.0]
+
+        for s in subset["test"]:
+            assert s.label in [0.0, 1.0]
+
+def test_protocol_consistency_bbox():
+    from ptbench.data.tbx11k_simplified_v2 import dataset_with_bboxes
+
+    # Default protocol
+    subset = dataset_with_bboxes.subsets("default")
+    assert len(subset) == 3
+
+    assert "train" in subset
+    assert len(subset["train"]) == 2767
+    for s in subset["train"]:
+        assert s.key.startswith("images/")
+
+    assert "validation" in subset
+    assert len(subset["validation"]) == 706
+    for s in subset["validation"]:
+        assert s.key.startswith("images/")
+
+    assert "test" in subset
+    assert len(subset["test"]) == 957
+    for s in subset["test"]:
+        assert s.key.startswith("images/")
+
+    # Check labels
+    for s in subset["train"]:
+        assert s.label in [0.0, 1.0]
+
+    for s in subset["validation"]:
+        assert s.label in [0.0, 1.0]
+
+    for s in subset["test"]:
+        assert s.label in [0.0, 1.0]
+
+    # Check bounding boxes
+    for s in subset["train"]:
+        assert s.bboxes == "none" or s.bboxes[0].startswith("{'xmin':")
+
+    # Cross-validation fold 0-9
+    for f in range(10):
+        subset = dataset_with_bboxes.subsets("fold_" + str(f))
+        assert len(subset) == 3
+
+        assert "train" in subset
+        assert len(subset["train"]) == 3177
+        for s in subset["train"]:
+            assert s.key.startswith("images/")
+
+        assert "validation" in subset
+        assert len(subset["validation"]) == 810
+        for s in subset["validation"]:
+            assert s.key.startswith("images/")
+
+        assert "test" in subset
+        assert len(subset["test"]) == 443
+        for s in subset["test"]:
+            assert s.key.startswith("images/")
+
+        # Check labels
+        for s in subset["train"]:
+            assert s.label in [0.0, 1.0]
+
+        for s in subset["validation"]:
+            assert s.label in [0.0, 1.0]
+
+        for s in subset["test"]:
+            assert s.label in [0.0, 1.0]
+
+        # Check bounding boxes
+        for s in subset["train"]:
+            assert s.bboxes == "none" or s.bboxes[0].startswith("{'xmin':")
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v2")
+def test_loading():
+    from ptbench.data.tbx11k_simplified_v2 import dataset
+
+    def _check_sample(s):
+        data = s.data
+        assert isinstance(data, dict)
+        assert len(data) == 2
+
+        assert "data" in data
+        assert data["data"].size == (512, 512)
+
+        assert data["data"].mode == "L"  # Check colors
+
+        assert "label" in data
+        assert data["label"] in [0, 1]  # Check labels
+
+    limit = 30  # use this to limit testing to first images only, else None
+
+    subset = dataset.subsets("default")
+    for s in subset["train"][:limit]:
+        _check_sample(s)
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v2")
+def test_loading_bbox():
+    from ptbench.data.tbx11k_simplified_v2 import dataset_with_bboxes
+
+    def _check_sample(s):
+        data = s.data
+        assert isinstance(data, dict)
+        assert len(data) == 3
+
+        assert "data" in data
+        assert data["data"].size == (512, 512)
+
+        assert data["data"].mode == "L"  # Check colors
+
+        assert "label" in data
+        assert data["label"] in [0, 1]  # Check labels
+
+        assert "bboxes" in data
+        assert data["bboxes"] == "none" or data["bboxes"][0].startswith("{'xmin':")
+
+    limit = 30  # use this to limit testing to first images only, else None
+
+    subset = dataset_with_bboxes.subsets("default")
+    for s in subset["train"][:limit]:
+        _check_sample(s)
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v2")
+def test_check():
+    from ptbench.data.tbx11k_simplified_v2 import dataset
+
+    assert dataset.check() == 0
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v2")
+def test_check_bbox():
+    from ptbench.data.tbx11k_simplified_v2 import dataset_with_bboxes
+
+    assert dataset_with_bboxes.check() == 0
diff --git a/tests/test_11k_v3.py b/tests/test_11k_v3.py
new file mode 100644
index 0000000000000000000000000000000000000000..f659c4082a55f7eec66bd5dfd336fa1f4c6d5bed
--- /dev/null
+++ b/tests/test_11k_v3.py
@@ -0,0 +1,201 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Tests for TBX11K simplified dataset."""
+
+import pytest
+
+
+def test_protocol_consistency():
+    from ptbench.data.tbx11k_simplified_v3 import dataset
+
+    # Default protocol
+    subset = dataset.subsets("default")
+    assert len(subset) == 3
+
+    assert "train" in subset
+    assert len(subset["train"]) == 2767
+    for s in subset["train"]:
+        assert s.key.startswith("images/")
+
+    assert "validation" in subset
+    assert len(subset["validation"]) == 706
+    for s in subset["validation"]:
+        assert s.key.startswith("images/")
+
+    assert "test" in subset
+    assert len(subset["test"]) == 957
+    for s in subset["test"]:
+        assert s.key.startswith("images/")
+
+    # Check labels
+    for s in subset["train"]:
+        assert s.label in [0.0, 1.0]
+
+    for s in subset["validation"]:
+        assert s.label in [0.0, 1.0]
+
+    for s in subset["test"]:
+        assert s.label in [0.0, 1.0]
+
+    # Cross-validation fold 0-9
+    for f in range(10):
+        subset = dataset.subsets("fold_" + str(f))
+        assert len(subset) == 3
+
+        assert "train" in subset
+        assert len(subset["train"]) == 3177
+        for s in subset["train"]:
+            assert s.key.startswith("images/")
+
+        assert "validation" in subset
+        assert len(subset["validation"]) == 810
+        for s in subset["validation"]:
+            assert s.key.startswith("images/")
+
+        assert "test" in subset
+        assert len(subset["test"]) == 443
+        for s in subset["test"]:
+            assert s.key.startswith("images/")
+
+        # Check labels
+        for s in subset["train"]:
+            assert s.label in [0.0, 1.0]
+
+        for s in subset["validation"]:
+            assert s.label in [0.0, 1.0]
+
+        for s in subset["test"]:
+            assert s.label in [0.0, 1.0]
+
+def test_protocol_consistency_bbox():
+    from ptbench.data.tbx11k_simplified_v3 import dataset_with_bboxes
+
+    # Default protocol
+    subset = dataset_with_bboxes.subsets("default")
+    assert len(subset) == 3
+
+    assert "train" in subset
+    assert len(subset["train"]) == 2767
+    for s in subset["train"]:
+        assert s.key.startswith("images/")
+
+    assert "validation" in subset
+    assert len(subset["validation"]) == 706
+    for s in subset["validation"]:
+        assert s.key.startswith("images/")
+
+    assert "test" in subset
+    assert len(subset["test"]) == 957
+    for s in subset["test"]:
+        assert s.key.startswith("images/")
+
+    # Check labels
+    for s in subset["train"]:
+        assert s.label in [0.0, 1.0]
+
+    for s in subset["validation"]:
+        assert s.label in [0.0, 1.0]
+
+    for s in subset["test"]:
+        assert s.label in [0.0, 1.0]
+
+    # Check bounding boxes
+    for s in subset["train"]:
+        assert s.bboxes == "none" or s.bboxes[0].startswith("{'xmin':")
+
+    # Cross-validation fold 0-9
+    for f in range(10):
+        subset = dataset_with_bboxes.subsets("fold_" + str(f))
+        assert len(subset) == 3
+
+        assert "train" in subset
+        assert len(subset["train"]) == 3177
+        for s in subset["train"]:
+            assert s.key.startswith("images/")
+
+        assert "validation" in subset
+        assert len(subset["validation"]) == 810
+        for s in subset["validation"]:
+            assert s.key.startswith("images/")
+
+        assert "test" in subset
+        assert len(subset["test"]) == 443
+        for s in subset["test"]:
+            assert s.key.startswith("images/")
+
+        # Check labels
+        for s in subset["train"]:
+            assert s.label in [0.0, 1.0]
+
+        for s in subset["validation"]:
+            assert s.label in [0.0, 1.0]
+
+        for s in subset["test"]:
+            assert s.label in [0.0, 1.0]
+
+        # Check bounding boxes
+        for s in subset["train"]:
+            assert s.bboxes == "none" or s.bboxes[0].startswith("{'xmin':")
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v3")
+def test_loading():
+    from ptbench.data.tbx11k_simplified_v3 import dataset
+
+    def _check_sample(s):
+        data = s.data
+        assert isinstance(data, dict)
+        assert len(data) == 2
+
+        assert "data" in data
+        assert data["data"].size == (512, 512)
+
+        assert data["data"].mode == "L"  # Check colors
+
+        assert "label" in data
+        assert data["label"] in [0, 1]  # Check labels
+
+    limit = 30  # use this to limit testing to first images only, else None
+
+    subset = dataset.subsets("default")
+    for s in subset["train"][:limit]:
+        _check_sample(s)
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v3")
+def test_loading_bbox():
+    from ptbench.data.tbx11k_simplified_v3 import dataset_with_bboxes
+
+    def _check_sample(s):
+        data = s.data
+        assert isinstance(data, dict)
+        assert len(data) == 3
+
+        assert "data" in data
+        assert data["data"].size == (512, 512)
+
+        assert data["data"].mode == "L"  # Check colors
+
+        assert "label" in data
+        assert data["label"] in [0, 1]  # Check labels
+
+        assert "bboxes" in data
+        assert data["bboxes"] == "none" or data["bboxes"][0].startswith("{'xmin':")
+
+    limit = 30  # use this to limit testing to first images only, else None
+
+    subset = dataset_with_bboxes.subsets("default")
+    for s in subset["train"][:limit]:
+        _check_sample(s)
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v3")
+def test_check():
+    from ptbench.data.tbx11k_simplified_v3 import dataset
+
+    assert dataset.check() == 0
+
+@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v3")
+def test_check_bbox():
+    from ptbench.data.tbx11k_simplified_v3 import dataset_with_bboxes
+
+    assert dataset_with_bboxes.check() == 0