diff --git a/bob/ip/binseg/data/dataset.py b/bob/ip/binseg/data/dataset.py index 3d7bb96e63dc2df921d8b56d0817d306d91ddff0..92599aa8986d1838b3e8b32540b524476ac77761 100644 --- a/bob/ip/binseg/data/dataset.py +++ b/bob/ip/binseg/data/dataset.py @@ -87,9 +87,19 @@ class JSONDataset: self.loader = loader self.keymaker = keymaker - def check(self): + def check(self, limit=0): """For each protocol, check if all data can be correctly accessed + + Parameters + ---------- + + limit : int + Maximum number of samples to check (in each protocol/subset + combination) in this dataset. If set to zero, then check + everything. + + Returns ------- @@ -104,6 +114,9 @@ class JSONDataset: logger.info(f"Checking protocol '{proto}'...") for name, samples in self.subsets(proto).items(): logger.info(f"Checking subset '{name}'...") + if limit: + logger.info(f"Checking at most first '{limit}' samples...") + samples = samples[:limit] for sample in samples: try: sample.data # triggers loading @@ -230,9 +243,19 @@ class CSVDataset: self.loader = loader self.keymaker = keymaker - def check(self): + def check(self, limit=0): """For each subset, check if all data can be correctly accessed + + Parameters + ---------- + + limit : int + Maximum number of samples to check (in each protocol/subset + combination) in this dataset. If set to zero, then check + everything. + + Returns ------- @@ -245,7 +268,11 @@ class CSVDataset: errors = 0 for name in self._subsets.keys(): logger.info(f"Checking subset '{name}'...") - for sample in self.samples(name): + samples = self.samples(name) + if limit: + logger.info(f"Checking at most first '{limit}' samples...") + samples = samples[:limit] + for sample in samples: try: sample.data # triggers loading logger.info(f"{sample.key}: OK") diff --git a/bob/ip/binseg/script/dataset.py b/bob/ip/binseg/script/dataset.py index c396aa47f7c611a65acf465ba8b21d1f31832e5d..c2eb52db9cf95ad92423e63713a5922680c972ba 100644 --- a/bob/ip/binseg/script/dataset.py +++ b/bob/ip/binseg/script/dataset.py @@ -104,8 +104,17 @@ def list(**kwargs): 'dataset', nargs=-1, ) +@click.option( + "--limit", + "-l", + help="Limit check to the first N samples in each dataset, making the " + "check sensibly faster. Set it to zero to check everything.", + required=True, + type=click.IntRange(0), + default=0, +) @verbosity_option() -def check(dataset, **kwargs): +def check(dataset, limit, **kwargs): """Checks file access on one or more datasets""" to_check = _get_installed_datasets() @@ -123,6 +132,6 @@ def check(dataset, **kwargs): click.echo(f"Checking \"{k.group('name')}\" dataset...") module = importlib.import_module(f"...data.{k.group('name')}", __name__) - errors += module.dataset.check() + errors += module.dataset.check(limit) if not errors: click.echo(f"No errors reported")