From cca26728250f0a424dde2fcc0c670d5a4e810423 Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Tue, 21 Apr 2020 09:40:12 +0200 Subject: [PATCH] [data.dataset] Allow limit on dataset checks --- bob/ip/binseg/data/dataset.py | 33 ++++++++++++++++++++++++++++++--- bob/ip/binseg/script/dataset.py | 13 +++++++++++-- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/bob/ip/binseg/data/dataset.py b/bob/ip/binseg/data/dataset.py index 3d7bb96e..92599aa8 100644 --- a/bob/ip/binseg/data/dataset.py +++ b/bob/ip/binseg/data/dataset.py @@ -87,9 +87,19 @@ class JSONDataset: self.loader = loader self.keymaker = keymaker - def check(self): + def check(self, limit=0): """For each protocol, check if all data can be correctly accessed + + Parameters + ---------- + + limit : int + Maximum number of samples to check (in each protocol/subset + combination) in this dataset. If set to zero, then check + everything. + + Returns ------- @@ -104,6 +114,9 @@ class JSONDataset: logger.info(f"Checking protocol '{proto}'...") for name, samples in self.subsets(proto).items(): logger.info(f"Checking subset '{name}'...") + if limit: + logger.info(f"Checking at most first '{limit}' samples...") + samples = samples[:limit] for sample in samples: try: sample.data # triggers loading @@ -230,9 +243,19 @@ class CSVDataset: self.loader = loader self.keymaker = keymaker - def check(self): + def check(self, limit=0): """For each subset, check if all data can be correctly accessed + + Parameters + ---------- + + limit : int + Maximum number of samples to check (in each protocol/subset + combination) in this dataset. If set to zero, then check + everything. + + Returns ------- @@ -245,7 +268,11 @@ class CSVDataset: errors = 0 for name in self._subsets.keys(): logger.info(f"Checking subset '{name}'...") - for sample in self.samples(name): + samples = self.samples(name) + if limit: + logger.info(f"Checking at most first '{limit}' samples...") + samples = samples[:limit] + for sample in samples: try: sample.data # triggers loading logger.info(f"{sample.key}: OK") diff --git a/bob/ip/binseg/script/dataset.py b/bob/ip/binseg/script/dataset.py index c396aa47..c2eb52db 100644 --- a/bob/ip/binseg/script/dataset.py +++ b/bob/ip/binseg/script/dataset.py @@ -104,8 +104,17 @@ def list(**kwargs): 'dataset', nargs=-1, ) +@click.option( + "--limit", + "-l", + help="Limit check to the first N samples in each dataset, making the " + "check sensibly faster. Set it to zero to check everything.", + required=True, + type=click.IntRange(0), + default=0, +) @verbosity_option() -def check(dataset, **kwargs): +def check(dataset, limit, **kwargs): """Checks file access on one or more datasets""" to_check = _get_installed_datasets() @@ -123,6 +132,6 @@ def check(dataset, **kwargs): click.echo(f"Checking \"{k.group('name')}\" dataset...") module = importlib.import_module(f"...data.{k.group('name')}", __name__) - errors += module.dataset.check() + errors += module.dataset.check(limit) if not errors: click.echo(f"No errors reported") -- GitLab