From a669cc5a968d3b356d04a4066be4f221a59f7ceb Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.dos.anjos@gmail.com>
Date: Thu, 30 Apr 2020 17:52:35 +0200
Subject: [PATCH] [data.dataset] Fix and test check() method implementation
 after changes

---
 bob/ip/binseg/data/dataset.py                 | 27 +++++++++----------
 .../test/{test_csv.py => test_dataset.py}     |  2 ++
 2 files changed, 15 insertions(+), 14 deletions(-)
 rename bob/ip/binseg/test/{test_csv.py => test_dataset.py} (98%)

diff --git a/bob/ip/binseg/data/dataset.py b/bob/ip/binseg/data/dataset.py
index 7756b01d..dd6cb140 100644
--- a/bob/ip/binseg/data/dataset.py
+++ b/bob/ip/binseg/data/dataset.py
@@ -116,14 +116,15 @@ class JSONDataset:
                     samples = samples[:limit]
                 for pos, sample in enumerate(samples):
                     try:
-                        assert len(sample) == len(self.fieldnames), (
-                            f"Entry {pos} in subset {name} of protocol "
-                            f"{proto} has {len(sample)} entries instead of "
-                            f"{len(self.fieldnames)} (expected). Fix file "
-                            f"'{self._protocols[proto]}'"
-                        )
-                        sample.data  # check data can be loaded
+                        sample.data  # may trigger data loading
                         logger.info(f"{sample.key}: OK")
+                    except Exception as e:
+                        logger.error(
+                            f"Found error loading entry {pos} in subset {name} "
+                            f"of protocol {proto} from file "
+                            f"'{self._protocols[proto]}': {e}"
+                            )
+                        errors += 1
                     except Exception as e:
                         logger.error(f"{sample.key}: {e}")
                         errors += 1
@@ -257,15 +258,13 @@ class CSVDataset:
                 samples = samples[:limit]
             for pos, sample in enumerate(samples):
                 try:
-                    assert len(sample) == len(self.fieldnames), (
-                        f"Entry {pos} in subset {name} has {len(sample)} "
-                        f"entries instead of {len(self.fieldnames)} "
-                        f"(expected). Fix file '{self._subsets[name]}'"
-                    )
-                    sample.data  # triggers loading
+                    sample.data  # may trigger data loading
                     logger.info(f"{sample.key}: OK")
                 except Exception as e:
-                    logger.error(f"{sample.key}: {e}")
+                    logger.error(
+                        f"Found error loading entry {pos} in subset {name} "
+                        f"from file '{self._subsets[name]}': {e}"
+                        )
                     errors += 1
         return errors
 
diff --git a/bob/ip/binseg/test/test_csv.py b/bob/ip/binseg/test/test_dataset.py
similarity index 98%
rename from bob/ip/binseg/test/test_csv.py
rename to bob/ip/binseg/test/test_dataset.py
index 1d8e22fc..f06d1a4a 100644
--- a/bob/ip/binseg/test/test_csv.py
+++ b/bob/ip/binseg/test/test_dataset.py
@@ -45,6 +45,7 @@ def test_csv_loading():
             )
 
     dataset = CSVDataset(subsets, fieldnames, _raw_data_loader)
+    dataset.check()
 
     data = dataset.subsets()
 
@@ -78,6 +79,7 @@ def test_json_loading():
             )
 
     dataset = JSONDataset(protocols, fieldnames, _raw_data_loader)
+    dataset.check()
 
     data = dataset.subsets("default")
 
-- 
GitLab