Skip to content
Snippets Groups Projects
Commit 129a17a1 authored by Daniel CARRON's avatar Daniel CARRON :b: Committed by André Anjos
Browse files

[script] Move config and database scripts to common lib

parent 820251d5
No related branches found
No related tags found
1 merge request!46Create common library
......@@ -2,14 +2,12 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later
import importlib.metadata
import inspect
import pathlib
import typing
import click
from clapper.click import AliasedGroup, verbosity_option
from clapper.logging import setup
from mednet.libs.common.scripts.config import copy as copy_
from mednet.libs.common.scripts.config import describe as describe_
from mednet.libs.common.scripts.config import list_ as list__
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
......@@ -45,54 +43,7 @@ def config():
@verbosity_option(logger=logger)
def list_(verbose) -> None: # numpydoc ignore=PR01
"""List configuration files installed."""
entry_points = importlib.metadata.entry_points().select(
group="mednet.libs.classification.config",
)
entry_point_dict = {k.name: k for k in entry_points}
# all potential modules with configuration resources
modules = {k.module.rsplit(".", 1)[0] for k in entry_point_dict.values()}
# sort data entries by originating module
entry_points_by_module: dict[str, dict[str, typing.Any]] = {}
for k in modules:
entry_points_by_module[k] = {}
for name, ep in entry_point_dict.items():
if ep.module.rsplit(".", 1)[0] == k:
entry_points_by_module[k][name] = ep
for config_type in sorted(entry_points_by_module):
# calculates the longest config name so we offset the printing
longest_name_length = max(
len(k) for k in entry_points_by_module[config_type].keys()
)
# set-up printing options
print_string = " %%-%ds %%s" % (longest_name_length,)
# 79 - 4 spaces = 75 (see string above)
description_leftover = 75 - longest_name_length
click.echo(f"module: {config_type}")
for name in sorted(entry_points_by_module[config_type]):
ep = entry_point_dict[name]
if verbose >= 1:
module = ep.load()
doc = inspect.getdoc(module)
if doc is not None:
summary = doc.split("\n\n")[0]
else:
summary = "<DOCSTRING NOT AVAILABLE>"
else:
summary = ""
summary = (
(summary[: (description_leftover - 3)] + "...")
if len(summary) > (description_leftover - 3)
else summary
)
click.echo(print_string % (name, summary))
list__("mednet.libs.classification.config", verbose)
@config.command(
......@@ -124,29 +75,7 @@ def list_(verbose) -> None: # numpydoc ignore=PR01
@verbosity_option(logger=logger)
def describe(name, verbose) -> None: # numpydoc ignore=PR01
"""Describe a specific configuration file."""
entry_points = importlib.metadata.entry_points().select(
group="mednet.libs.classification.config",
)
entry_point_dict = {k.name: k for k in entry_points}
for k in name:
if k not in entry_point_dict:
logger.error("Cannot find configuration resource '%s'", k)
continue
ep = entry_point_dict[k]
click.echo(f"Configuration: {ep.name}")
click.echo(f"Python Module: {ep.module}")
click.echo("")
mod = ep.load()
if verbose >= 1:
fname = inspect.getfile(mod)
click.echo("Contents:")
with pathlib.Path(fname).open() as f:
click.echo(f.read())
else: # only output documentation
click.echo("Documentation:")
click.echo(inspect.getdoc(mod))
describe_(name, "mednet.libs.classification.config", verbose)
@config.command(
......@@ -175,19 +104,4 @@ def describe(name, verbose) -> None: # numpydoc ignore=PR01
@verbosity_option(logger=logger, expose_value=False)
def copy(source, destination) -> None: # numpydoc ignore=PR01
"""Copy a specific configuration resource so it can be modified locally."""
import shutil
entry_points = importlib.metadata.entry_points().select(
group="mednet.libs.classification.config",
)
entry_point_dict = {k.name: k for k in entry_points}
if source not in entry_point_dict:
logger.error("Cannot find configuration resource '%s'", source)
return
ep = entry_point_dict[source]
mod = ep.load()
src_name = inspect.getfile(mod)
logger.info(f"cp {src_name} -> {destination}")
shutil.copyfile(src_name, destination)
copy_(source, destination, "mednet.libs.classification.config")
......@@ -5,6 +5,8 @@
import click
from clapper.click import AliasedGroup, verbosity_option
from clapper.logging import setup
from mednet.libs.common.scripts.database import check as check_
from mednet.libs.common.scripts.database import list_ as list__
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
......@@ -93,18 +95,8 @@ def database() -> None:
@verbosity_option(logger=logger, expose_value=False)
def list_():
"""List all supported and configured databases."""
config = _get_raw_databases()
click.echo("Available databases:")
for k, v in config.items():
if "datadir" not in v:
# this database does not have a "datadir"
continue
if v["datadir"] is not None:
click.secho(f'- {k} ({v["module"]}): "{v["datadir"]}"', fg="green")
else:
click.echo(f'- {k} ({v["module"]}): NOT installed')
list__(_get_raw_databases())
@database.command(
......@@ -135,62 +127,4 @@ def list_():
@verbosity_option(logger=logger, expose_value=False)
def check(split, limit): # numpydoc ignore=PR01
"""Check file access on one or more DataModules."""
import importlib.metadata
import sys
click.secho(f"Checking split `{split}`...", fg="yellow")
try:
module = importlib.metadata.entry_points(
group="mednet.libs.classification.config"
)[split].module
except KeyError:
raise Exception(f"Could not find database split `{split}`")
datamodule = importlib.import_module(module).datamodule
datamodule.model_transforms = [] # should be done before setup()
datamodule.batch_size = 1 # ensure one sample is loaded at a time
datamodule.setup("predict") # sets up all datasets
loaders = datamodule.predict_dataloader()
errors = 0
for k, loader in loaders.items():
if limit == 0:
click.secho(
f"Checking all samples of dataset `{k}` at split `{split}`...",
fg="yellow",
)
loader_limit = sys.maxsize
else:
click.secho(
f"Checking first {limit} samples of dataset "
f"`{k}` at split `{split}`...",
fg="yellow",
)
loader_limit = limit
# the for loop will trigger raw data loading (ie. user code), protect
# it
try:
for i, batch in enumerate(loader):
if loader_limit == 0:
break
logger.info(
f"{batch[1]['name'][0]}: "
f"{[s for s in batch[0][0].shape]}@{batch[0][0].dtype}",
)
loader_limit -= 1
except Exception:
logger.exception(f"Unable to load batch {i} in dataset {k}")
errors += 1
if not errors:
click.secho(
f"OK! No errors were reported for database split `{split}`.",
fg="green",
)
else:
click.secho(
f"Found {errors} errors loading DataModule `{split}`.",
fg="red",
)
check_("mednet.libs.classification.config", split, limit)
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
import importlib.metadata
import inspect
import pathlib
import typing
import click
from clapper.logging import setup
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
def list_(entry_point_group, verbose) -> None: # numpydoc ignore=PR01
"""List configuration files installed."""
entry_points = importlib.metadata.entry_points().select(
group=entry_point_group,
)
entry_point_dict = {k.name: k for k in entry_points}
# all potential modules with configuration resources
modules = {k.module.rsplit(".", 1)[0] for k in entry_point_dict.values()}
# sort data entries by originating module
entry_points_by_module: dict[str, dict[str, typing.Any]] = {}
for k in modules:
entry_points_by_module[k] = {}
for name, ep in entry_point_dict.items():
if ep.module.rsplit(".", 1)[0] == k:
entry_points_by_module[k][name] = ep
for config_type in sorted(entry_points_by_module):
# calculates the longest config name so we offset the printing
longest_name_length = max(
len(k) for k in entry_points_by_module[config_type].keys()
)
# set-up printing options
print_string = " %%-%ds %%s" % (longest_name_length,)
# 79 - 4 spaces = 75 (see string above)
description_leftover = 75 - longest_name_length
click.echo(f"module: {config_type}")
for name in sorted(entry_points_by_module[config_type]):
ep = entry_point_dict[name]
if verbose >= 1:
module = ep.load()
doc = inspect.getdoc(module)
if doc is not None:
summary = doc.split("\n\n")[0]
else:
summary = "<DOCSTRING NOT AVAILABLE>"
else:
summary = ""
summary = (
(summary[: (description_leftover - 3)] + "...")
if len(summary) > (description_leftover - 3)
else summary
)
click.echo(print_string % (name, summary))
def describe(name, entry_point_group, verbose) -> None: # numpydoc ignore=PR01
"""Describe a specific configuration file."""
entry_points = importlib.metadata.entry_points().select(
group=entry_point_group,
)
entry_point_dict = {k.name: k for k in entry_points}
for k in name:
if k not in entry_point_dict:
logger.error("Cannot find configuration resource '%s'", k)
continue
ep = entry_point_dict[k]
click.echo(f"Configuration: {ep.name}")
click.echo(f"Python Module: {ep.module}")
click.echo("")
mod = ep.load()
if verbose >= 1:
fname = inspect.getfile(mod)
click.echo("Contents:")
with pathlib.Path(fname).open() as f:
click.echo(f.read())
else: # only output documentation
click.echo("Documentation:")
click.echo(inspect.getdoc(mod))
def copy(
source, destination, entry_point_group
) -> None: # numpydoc ignore=PR01
"""Copy a specific configuration resource so it can be modified locally."""
import shutil
entry_points = importlib.metadata.entry_points().select(
group=entry_point_group,
)
entry_point_dict = {k.name: k for k in entry_points}
if source not in entry_point_dict:
logger.error("Cannot find configuration resource '%s'", source)
return
ep = entry_point_dict[source]
mod = ep.load()
src_name = inspect.getfile(mod)
logger.info(f"cp {src_name} -> {destination}")
shutil.copyfile(src_name, destination)
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
import click
from clapper.logging import setup
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
def list_(config: dict[str, dict[str, str]]) -> None:
"""List all supported and configured databases.
Parameters
----------
config
Dictionary where keys are database names, and values are dictionaries
containing two string keys:
* ``module``: the full Pythonic module name (e.g.
``mednet.libs.classification.data.montgomery``).
* ``datadir``: points to the user-configured data directory for the
current dataset, if set, or ``None`` otherwise.
"""
click.echo("Available databases:")
for k, v in config.items():
if "datadir" not in v:
# this database does not have a "datadir"
continue
if v["datadir"] is not None:
click.secho(f'- {k} ({v["module"]}): "{v["datadir"]}"', fg="green")
else:
click.echo(f'- {k} ({v["module"]}): NOT installed')
def check(entry_point_group, split, limit): # numpydoc ignore=PR01
"""Check file access on one or more DataModules."""
import importlib.metadata
import sys
click.secho(f"Checking split `{split}`...", fg="yellow")
try:
module = importlib.metadata.entry_points(group=entry_point_group)[
split
].module
except KeyError:
raise Exception(f"Could not find database split `{split}`")
datamodule = importlib.import_module(module).datamodule
datamodule.model_transforms = [] # should be done before setup()
datamodule.batch_size = 1 # ensure one sample is loaded at a time
datamodule.setup("predict") # sets up all datasets
loaders = datamodule.predict_dataloader()
errors = 0
for k, loader in loaders.items():
if limit == 0:
click.secho(
f"Checking all samples of dataset `{k}` at split `{split}`...",
fg="yellow",
)
loader_limit = sys.maxsize
else:
click.secho(
f"Checking first {limit} samples of dataset "
f"`{k}` at split `{split}`...",
fg="yellow",
)
loader_limit = limit
# the for loop will trigger raw data loading (ie. user code), protect
# it
try:
for i, batch in enumerate(loader):
if loader_limit == 0:
break
logger.info(
f"{batch[1]['name'][0]}: "
f"{[s for s in batch[0][0].shape]}@{batch[0][0].dtype}",
)
loader_limit -= 1
except Exception:
logger.exception(f"Unable to load batch {i} in dataset {k}")
errors += 1
if not errors:
click.secho(
f"OK! No errors were reported for database split `{split}`.",
fg="green",
)
else:
click.secho(
f"Found {errors} errors loading DataModule `{split}`.",
fg="red",
)
......@@ -8,8 +8,8 @@ from clapper.click import AliasedGroup
from . import (
# analyze,
# compare,
# config,
# dataset,
config,
database,
# evaluate,
# experiment,
# mkmask,
......@@ -30,8 +30,8 @@ def segmentation():
# segmentation.add_command(analyze.analyze)
# segmentation.add_command(compare.compare)
# segmentation.add_command(config.config)
# segmentation.add_command(dataset.dataset)
segmentation.add_command(config.config)
segmentation.add_command(database.database)
# segmentation.add_command(evaluate.evaluate)
# segmentation.add_command(experiment.experiment)
# segmentation.add_command(mkmask.mkmask)
......
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
import click
from clapper.click import AliasedGroup, verbosity_option
from clapper.logging import setup
from mednet.libs.common.scripts.config import copy as copy_
from mednet.libs.common.scripts.config import describe as describe_
from mednet.libs.common.scripts.config import list_ as list__
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
@click.group(cls=AliasedGroup)
def config():
"""Command for listing, describing and copying configuration resources."""
pass
@config.command(
name="list",
epilog="""Examples:
\b
1. Lists all configuration resources (type: mednet.libs.classification.config) installed:
.. code:: sh
mednet config list
\b
2. Lists all configuration resources and their descriptions (notice this may
be slow as it needs to load all modules once):
.. code:: sh
mednet config list -v
""",
)
@verbosity_option(logger=logger)
def list_(verbose) -> None: # numpydoc ignore=PR01
"""List configuration files installed."""
list__("mednet.libs.segmentation.config", verbose)
@config.command(
epilog="""Examples:
\b
1. Describe the Montgomery dataset configuration:
.. code:: sh
mednet config describe montgomery
\b
2. Describe the Montgomery dataset configuration and lists its
contents:
.. code:: sh
mednet config describe montgomery -v
""",
)
@click.argument(
"name",
required=True,
nargs=-1,
)
@verbosity_option(logger=logger)
def describe(name, verbose) -> None: # numpydoc ignore=PR01
"""Describe a specific configuration file."""
describe_(name, "mednet.libs.segmentation.config", verbose)
@config.command(
epilog="""Examples:
\b
1. Make a copy of one of the stock configuration files locally, so it can be
adapted:
.. code:: sh
$ mednet config copy montgomery -vvv newdataset.py
""",
)
@click.argument(
"source",
required=True,
nargs=1,
)
@click.argument(
"destination",
required=True,
nargs=1,
)
@verbosity_option(logger=logger, expose_value=False)
def copy(source, destination) -> None: # numpydoc ignore=PR01
"""Copy a specific configuration resource so it can be modified locally."""
copy_(source, destination, "mednet.libs.segmentation.config")
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
import click
from clapper.click import AliasedGroup, verbosity_option
from clapper.logging import setup
from mednet.libs.common.scripts.database import check as check_
from mednet.libs.common.scripts.database import list_ as list__
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
def _get_raw_databases() -> dict[str, dict[str, str]]:
"""Return a list of all supported (raw) databases.
Returns
-------
dict[str, dict[str, str]]
Dictionary where keys are database names, and values are dictionaries
containing two string keys:
* ``module``: the full Pythonic module name (e.g.
``mednet.libs.classification.data.montgomery``).
* ``datadir``: points to the user-configured data directory for the
current dataset, if set, or ``None`` otherwise.
"""
import importlib
import pkgutil
from ..config import data
from ..utils.rc import load_rc
user_configuration = load_rc()
retval = {}
for k in pkgutil.iter_modules(data.__path__):
for j in pkgutil.iter_modules(
[next(iter(data.__path__)) + f"/{k.name}"],
):
if j.name == "datamodule":
# this is a submodule that can read raw data files
module = importlib.import_module(
f".{j.name}",
data.__package__ + f".{k.name}",
)
if hasattr(module, "CONFIGURATION_KEY_DATADIR"):
retval[k.name] = dict(
module=module.__name__.rsplit(".", 1)[0],
datadir=user_configuration.get(
module.CONFIGURATION_KEY_DATADIR,
),
)
else:
retval[k.name] = dict(module=module.__name__)
return retval
@click.group(cls=AliasedGroup)
def database() -> None:
"""Command for listing and verifying databases installed."""
pass
@database.command(
name="list",
epilog="""Examples:
\b
1. To install a database, set up its data directory ("datadir"). For
example, to setup access to Montgomery files you downloaded locally at
the directory "/path/to/montgomery/files", edit the RC file (typically
``$HOME/.config/mednet.libs.classification.toml``), and add a line like the following:
.. code:: toml
[datadir]
montgomery = "/path/to/montgomery/files"
.. note::
This setting **is** case-sensitive.
\b
2. List all raw databases supported (and configured):
.. code:: sh
$ mednet database list
""",
)
@verbosity_option(logger=logger, expose_value=False)
def list_():
"""List all supported and configured databases."""
list__(_get_raw_databases())
@database.command(
epilog="""Examples:
1. Check if all files from the split 'montgomery-f0' of the Montgomery
database can be loaded:
.. code:: sh
mednet datamodule check -vv montgomery-f0
""",
)
@click.argument(
"split",
nargs=1,
)
@click.option(
"--limit",
"-l",
help="Limit check to the first N samples in each split dataset, making the "
"check sensibly faster. Set it to zero (default) to check everything.",
required=True,
type=click.IntRange(0),
default=0,
)
@verbosity_option(logger=logger, expose_value=False)
def check(split, limit): # numpydoc ignore=PR01
"""Check file access on one or more DataModules."""
check_("mednet.libs.segmentation.config", split, limit)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment