Skip to content
Snippets Groups Projects
Commit cca26728 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[data.dataset] Allow limit on dataset checks

parent ccd3bbb7
No related branches found
No related tags found
1 merge request!12Streamlining
......@@ -87,9 +87,19 @@ class JSONDataset:
self.loader = loader
self.keymaker = keymaker
def check(self):
def check(self, limit=0):
"""For each protocol, check if all data can be correctly accessed
Parameters
----------
limit : int
Maximum number of samples to check (in each protocol/subset
combination) in this dataset. If set to zero, then check
everything.
Returns
-------
......@@ -104,6 +114,9 @@ class JSONDataset:
logger.info(f"Checking protocol '{proto}'...")
for name, samples in self.subsets(proto).items():
logger.info(f"Checking subset '{name}'...")
if limit:
logger.info(f"Checking at most first '{limit}' samples...")
samples = samples[:limit]
for sample in samples:
try:
sample.data # triggers loading
......@@ -230,9 +243,19 @@ class CSVDataset:
self.loader = loader
self.keymaker = keymaker
def check(self):
def check(self, limit=0):
"""For each subset, check if all data can be correctly accessed
Parameters
----------
limit : int
Maximum number of samples to check (in each protocol/subset
combination) in this dataset. If set to zero, then check
everything.
Returns
-------
......@@ -245,7 +268,11 @@ class CSVDataset:
errors = 0
for name in self._subsets.keys():
logger.info(f"Checking subset '{name}'...")
for sample in self.samples(name):
samples = self.samples(name)
if limit:
logger.info(f"Checking at most first '{limit}' samples...")
samples = samples[:limit]
for sample in samples:
try:
sample.data # triggers loading
logger.info(f"{sample.key}: OK")
......
......@@ -104,8 +104,17 @@ def list(**kwargs):
'dataset',
nargs=-1,
)
@click.option(
"--limit",
"-l",
help="Limit check to the first N samples in each dataset, making the "
"check sensibly faster. Set it to zero to check everything.",
required=True,
type=click.IntRange(0),
default=0,
)
@verbosity_option()
def check(dataset, **kwargs):
def check(dataset, limit, **kwargs):
"""Checks file access on one or more datasets"""
to_check = _get_installed_datasets()
......@@ -123,6 +132,6 @@ def check(dataset, **kwargs):
click.echo(f"Checking \"{k.group('name')}\" dataset...")
module = importlib.import_module(f"...data.{k.group('name')}",
__name__)
errors += module.dataset.check()
errors += module.dataset.check(limit)
if not errors:
click.echo(f"No errors reported")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment