# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> # # SPDX-License-Identifier: GPL-3.0-or-later from __future__ import annotations import importlib.metadata import importlib.resources import click from clapper.click import AliasedGroup, verbosity_option from clapper.logging import setup from ..data.split import check_database_split_loading logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") def _get_installed_protocols() -> dict[str, str]: """Returns a list of all installed protocols. Returns ------- protocols: List of protocols. """ entrypoints = sorted( [ entrypoint for entrypoint in importlib.metadata.entry_points( group="ptbench.config" ).names ] ) protocols = [ importlib.metadata.entry_points(group="ptbench.config")[ entrypoint ].module for entrypoint in entrypoints ] protocols_dict = { entrypoints[i]: protocols[i] for i in range(len(entrypoints)) } return protocols_dict @click.group(cls=AliasedGroup) def datamodule() -> None: """Commands for listing and verifying datamodules.""" pass @datamodule.command( epilog="""Examples: \b 1. To install a dataset, set up its data directory ("datadir"). For example, to setup access to Montgomery files you downloaded locally at the directory "/path/to/montgomery/files", edit the RC file (typically ``$HOME/.config/ptbench.toml``), and add a line like the following: .. code:: toml [datadir] montgomery = "/path/to/montgomery/files" .. note:: This setting **is** case-sensitive. \b 2. List all raw datasets supported (and configured): .. code:: sh $ ptbench dataset list """, ) @verbosity_option(logger=logger, expose_value=False) def list(): """Lists all supported and configured datasets.""" installed = _get_installed_protocols() click.echo("Available protocols:") for k, v in installed.items(): click.echo(f'- {k}: "{v}"') @datamodule.command( epilog="""Examples: 1. Check if all files from the fold_0 of the Montgomery database can be loaded: .. code:: sh ptbench datamodule check -vv montgomery_f0 2. Check if all files of multiple installed protocols can be loaded: .. code:: sh ptbench datamodule check -vv montgomery shenzhen """, ) @click.argument( "protocols", nargs=-1, ) @click.option( "--limit", "-l", help="Limit check to the first N samples in each datamodule, making the " "check sensibly faster. Set it to zero to check everything.", required=True, type=click.IntRange(0), default=0, ) @verbosity_option(logger=logger, expose_value=False) def check(protocols, limit): """Checks file access on one or more datamodules.""" import importlib errors = 0 for protocol in protocols: logger.info(f"Checking {protocol}") try: module = importlib.metadata.entry_points(group="ptbench.config")[ protocol ].module except KeyError: raise Exception(f"Could not find protocol {protocol}") datamodule = importlib.import_module(module).datamodule database_split = datamodule.database_split raw_data_loader = datamodule.raw_data_loader errors += check_database_split_loading( database_split, raw_data_loader, limit=limit ) if not errors: click.echo("No errors reported")