diff --git a/bob/devtools/build.py b/bob/devtools/build.py index 5af4c12608b82c458fe8e9c463fd1c27c3a648de..631da43e2b11d1f996f1fb81e640620664150077 100644 --- a/bob/devtools/build.py +++ b/bob/devtools/build.py @@ -14,12 +14,23 @@ import platform import subprocess import logging - logger = logging.getLogger(__name__) import yaml import distutils.version + + +def remove_conda_loggers(): + """Cleans-up conda API logger handlers to avoid logging repetition""" + + z = logging.getLogger() #conda places their handlers inside root + if z.handlers: + handler = z.handlers[0] + z.removeHandler(handler) + logger.debug("Removed conda logger handler at %s", handler) + import conda_build.api +remove_conda_loggers() def comment_cleanup(lines): @@ -83,6 +94,7 @@ def next_build_number(channel_url, basename): """ from conda.exports import get_index + remove_conda_loggers() # get the channel index logger.debug("Downloading channel index from %s", channel_url) @@ -164,6 +176,7 @@ def make_conda_config(config, python, append_file, condarc_options): """ from conda_build.conda_interface import url_path + remove_conda_loggers() retval = conda_build.api.get_or_merge_config( None, diff --git a/bob/devtools/graph.py b/bob/devtools/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..c7fe00912ed1ce94c78e7663d316c1248e11be47 --- /dev/null +++ b/bob/devtools/graph.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Utilities for calculating package dependencies and drawing graphs""" + +import os +import re +import glob +import fnmatch +import tempfile +import tarfile +from io import BytesIO + +from .bootstrap import set_environment +from .build import ( + next_build_number, + get_rendered_metadata, + get_parsed_recipe, + get_output_path, +) +from .log import get_logger, echo_info + +logger = get_logger(__name__) + + +def compute_adjencence_matrix( + gl, + package, + conda_config, + main_channel, + recurse_regexp="^(bob|beat|batl|gridtk)(\.)?(?!-).*$", + current={}, + ref="master", + deptypes=[], +): + """ + Given a target package, returns an adjacence matrix with its dependencies + returned via the conda-build API + + Parameters + ---------- + + gl : object + Pre-instantiated instance of the gitlab server API to use, of type + :py:class:`gitlab.Gitlab`. + + package : str + Name of the package, including its group in the format + ``group/package`` + + conda_config : dict + Dictionary of conda configuration options loaded from command-line and + read from defaults available. + + main_channel : str + Main channel to consider when looking for the next build number of + the target package + + recurse_regexp : str + Regular expression to use, for determining where to recurse for + resolving dependencies. Typically, this should be set to a list of + packages which exists in gitlab. If it includes more than that, then + we may not be able to reach the package repository and an error will be + raised. The default expression avoids recursing over bob/beat-devel + packages. + + current : dict + Current list of packages already inspected - useful for recurrent calls + to this function, to avoid potential cyclic dependencies. Corresponds + to the current return value of this function. + + ref : str + Name of the git reference (branch, tag or commit hash) to use + + deptypes : list + A list of dependence types to preserve when building the graph. If + empty, then preserve all. You may set values "build", "host", + "run" and "test", in any combination + + + Returns + ------- + + adjacence_matrix : dict + A dictionary that contains the dependencies of all packages considered + in the recursion. The keys are the name of the packages, the values, + correspond to the dependencies (host, build, run and test) as a list of + strings. + + """ + + use_package = gl.projects.get(package) + deptypes = deptypes if deptypes else ["host", "build", "run", "test"] + + if use_package.attributes["path_with_namespace"] in current: + return current + + echo_info( + "Resolving graph for %s@%s" + % (use_package.attributes["path_with_namespace"], ref) + ) + with tempfile.TemporaryDirectory() as tmpdir: + + logger.debug("Downloading archive for %s...", ref) + archive = use_package.repository_archive(ref=ref) # in memory + logger.debug("Archive has %d bytes", len(archive)) + + with tarfile.open(fileobj=BytesIO(archive), mode="r:gz") as f: + f.extractall(path=tmpdir) + + # use conda-build API to figure out all dependencies + recipe_dir = glob.glob(os.path.join(tmpdir, "*", "conda"))[0] + logger.debug("Resolving conda recipe for package at %s...", recipe_dir) + if not os.path.exists(recipe_dir): + raise RuntimeError( + "The conda recipe directory %s does not " "exist" % recipe_dir + ) + + version_candidate = os.path.join(recipe_dir, "..", "version.txt") + if os.path.exists(version_candidate): + version = open(version_candidate).read().rstrip() + set_environment("BOB_PACKAGE_VERSION", version) + + # pre-renders the recipe - figures out the destination + metadata = get_rendered_metadata(recipe_dir, conda_config) + rendered_recipe = get_parsed_recipe(metadata) + path = get_output_path(metadata, conda_config) + + # gets the next build number + build_number, _ = next_build_number( + main_channel, os.path.basename(path) + ) + + # at this point, all elements are parsed, I know the package version, + # build number and all dependencies + # exclude stuff we are not interested in + + # host and build should have precise numbers to be used for building + # this package. + if "host" not in deptypes: + host = [] + else: + host = rendered_recipe["requirements"].get("host", []) + + if "build" not in deptypes: + build = [] + else: + build = rendered_recipe["requirements"].get("build", []) + + # run dependencies are more vague + if "run" not in deptypes: + run = [] + else: + run = rendered_recipe["requirements"].get("run", []) + + # test dependencies even more vague + if "test" not in deptypes: + test = [] + else: + test = rendered_recipe.get("test", {}).get("requires", []) + + # for each of the above sections, recurse in figuring out dependencies, + # if dependencies match a target set of globs + recurse_compiled = re.compile(recurse_regexp) + + def _re_filter(l): + return [k for k in l if recurse_compiled.match(k)] + + all_recurse = set() + all_recurse |= set([z.split()[0] for z in _re_filter(host)]) + all_recurse |= set([z.split()[0] for z in _re_filter(build)]) + all_recurse |= set([z.split()[0] for z in _re_filter(run)]) + all_recurse |= set([z.split()[0] for z in _re_filter(test)]) + + # complete the package group, which is not provided by conda-build + def _add_default_group(p): + if p.startswith("bob") or p.startswith("gridtk"): + return "/".join(("bob", p)) + elif p.startswith("beat"): + return "/".join(("beat", p)) + elif p.startswith("batl"): + return "/".join(("batl", p)) + else: + logger.warning( + "Do not know how to recurse to package %s " + "(to which group does it belong?) - skipping...", + p, + ) + return None + + all_recurse = set([_add_default_group(k) for k in all_recurse]) + if None in all_recurse: + all_recurse.remove(None) + + # do not recurse for packages we already know + all_recurse -= set(current.keys()) + logger.info( + "Recursing over the following packages: %s", ", ".join(all_recurse) + ) + + for dep in all_recurse: + dep_adjmtx = compute_adjencence_matrix( + gl, + dep, + conda_config, + main_channel, + recurse_regexp=recurse_regexp, + ref=ref, + deptypes=deptypes, + ) + current.update(dep_adjmtx) + + current[package] = dict( + host=host, + build=build, + run=run, + test=test, + version=rendered_recipe["package"]["version"], + name=rendered_recipe["package"]["name"], + build_string=os.path.basename(path).split("-")[-1].split(".")[0], + ) + + return current + + +def generate_graph(adjacence_matrix, deptypes, whitelist): + """ + Computes a graphviz/dot representation of the build graph + + Parameters + ---------- + + adjacence_matrix : dict + A dictionary containing the adjacence matrix, that states the + dependencies for each package in the build, to other packages + + deptypes : list + A list of dependence types to preserve when building the graph. If + empty, then preserve all. You may set values "build", "host", + "run" and "test", in any combination + + whitelist : str + Regular expression for matching strings to preserve while building + the graph + + + Returns + ------- + + graph : graphviz.Digraph + The generated graph + + """ + + from graphviz import Digraph + + whitelist_compiled = re.compile(whitelist) + deptypes = deptypes if deptypes else ["host", "build", "run", "test"] + + graph = Digraph() + nodes = {} + + # generate nodes for all packages we want to track explicitly + for package, values in adjacence_matrix.items(): + if not whitelist_compiled.match(values["name"]): + logger.debug( + "Skipping main package %s (did not match whitelist)", + values["name"], + ) + continue + name = ( + values["name"] + + "\n" + + values["version"] + + "\n" + + values["build_string"] + ) + nodes[values["name"]] = graph.node( + values["name"], name, shape="box", color="blue" + ) + + # generates nodes for all dependencies + for package, values in adjacence_matrix.items(): + + # ensures we only have the most complete dependence in the our list + deps = {} + to_consider = set() + for k in deptypes: + to_consider |= set(values[k]) + for dep in to_consider: + name = dep.split()[0] + if name not in deps or (name in deps and not deps[name]): + deps[name] = dep.split()[1:] + + for ref, parts in deps.items(): + if not whitelist_compiled.match(ref): + logger.debug( + "Skipping dependence %s (did not match whitelist)", ref + ) + continue + + if not any([k == ref for k in nodes.keys()]): + # we do not have a node for that dependence, create it + name = str(ref) # new string + if len(parts) >= 1: + name += "\n" + parts[0] # dep version + if len(parts) >= 2: + name += "\n" + parts[1] # dep build + nodes[ref] = graph.node(ref, name) + + # connects package -> dep + graph.edge(values["name"], ref) + + return graph diff --git a/bob/devtools/scripts/build.py b/bob/devtools/scripts/build.py index 263b98f58838f49e36e32f94a77e0dd924a4c584..feac1b0a145684cc528846efe4910b3b038f027f 100644 --- a/bob/devtools/scripts/build.py +++ b/bob/devtools/scripts/build.py @@ -20,7 +20,10 @@ from ..build import ( get_docserver_setup, get_env_directory, get_output_path, + remove_conda_loggers, ) +remove_conda_loggers() + from ..constants import ( CONDA_BUILD_CONFIG, CONDA_RECIPE_APPEND, @@ -182,7 +185,7 @@ def build( from bob.devtools.bootstrap import do_hack project_dir = os.path.dirname(recipe_dir[0]) do_hack(project_dir) - + # get potential channel upload and other auxiliary channels channels = get_channels( @@ -235,7 +238,7 @@ def build( for d in recipe_dir: if not os.path.exists(d): - raise RuntimeError("The directory %s does not exist" % recipe_dir) + raise RuntimeError("The directory %s does not exist" % d) version_candidate = os.path.join(d, "..", "version.txt") if os.path.exists(version_candidate): diff --git a/bob/devtools/scripts/graph.py b/bob/devtools/scripts/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..12f654bf43a7be771c41e0f558a31d669d2f6f0f --- /dev/null +++ b/bob/devtools/scripts/graph.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + +import yaml +import click +from click_plugins import with_plugins + +from . import bdt +from ..constants import ( + CONDA_BUILD_CONFIG, + CONDA_RECIPE_APPEND, + SERVER, + MATPLOTLIB_RCDIR, + BASE_CONDARC, +) +from ..build import make_conda_config +from ..bootstrap import set_environment, get_channels +from ..release import get_gitlab_instance +from ..graph import compute_adjencence_matrix, generate_graph + +from ..log import verbosity_option, get_logger, echo_info +logger = get_logger(__name__) + + +@click.command( + epilog=""" +Examples: + + 1. Calculates and draws the graph of a package: + + $ bdt gitlab graph bob/bob.blitz + + 2. Calculates and draws only the runtime dependencies of a package + + $ bdt gitlab graph bob/bob.blitz --deptypes=run + +\b + 3. Calculates run and test dependencies of package, but only draws a subset + defined by a regular expression + +\b + $ bdt gitlab graph beat/beat.editor --deptypes=run --deptypes=test --whitelist='^beat\.(editor|cmdline).*$' + +""" +) +@click.argument("package", required=True) +@click.option( + "-p", + "--python", + default=("%d.%d" % sys.version_info[:2]), + show_default=True, + help="Version of python to build the environment for", +) +@click.option( + "-r", + "--condarc", + help="Use custom conda configuration file instead of our own", +) +@click.option( + "-m", + "--config", + "--variant-config-files", + show_default=True, + default=CONDA_BUILD_CONFIG, + help="overwrites the path leading to " "variant configuration file to use", +) +@click.option( + "-a", + "--append-file", + show_default=True, + default=CONDA_RECIPE_APPEND, + help="overwrites the path leading to " "appended configuration file to use", +) +@click.option( + "-S", + "--server", + show_default=True, + default=SERVER, + help="Server used for downloading conda packages and documentation " + "indexes of required packages", +) +@click.option( + "-P", + "--private/--no-private", + default=False, + help="Set this to **include** private channels on your search - " + "you **must** be at Idiap to execute this build in this case - " + "you **must** also use the correct server name through --server - " + "notice this option has no effect to conda if you also pass --condarc", +) +@click.option( + "-X", + "--stable/--no-stable", + default=False, + help="Set this to **exclude** beta channels from your build - " + "notice this option has no effect if you also pass --condarc", +) +@click.option( + "-C", + "--ci/--no-ci", + default=False, + hidden=True, + help="Use this flag to indicate the graph will be running on the CI", +) +@click.option( + "-n", + "--name", + show_default=True, + default="graph", + help="set the graph name", +) +@click.option( + "-f", + "--format", + show_default=True, + default="svg", + help="determines the type of output to expect", +) +@click.option( + "-w", + "--whitelist", + show_default=True, + default="^(bob|beat|batl|gridtk)(\.)?(?!-).*$", + help="package regular expression to preserve in the graph, " + "use .* for keeping all packages, including non-maintained ones. The " + "current expression accepts most of our packages, excluding " + "bob/beat-devel. This flag only affects the graph generation - we still " + "recurse over all packages to calculate dependencies.") +@click.option( + "-d", + "--deptypes", + show_default=True, + default=[], + multiple=True, + help="types of dependencies to consider. Pass multiple times to include " + "more types. Valid types are 'host', 'build', 'run' and 'test'. An " + "empty set considers all dependencies to the graph") +@verbosity_option() +@bdt.raise_on_error +def graph(package, python, condarc, config, append_file, server, private, + stable, ci, name, format, whitelist, deptypes): + """ + Computes the dependency graph of a gitlab package (via its conda recipe) + and outputs an dot file that can be used by graphviz to draw a direct + acyclic graph (DAG) of package dependencies. + + This command uses the conda-build API to resolve the package dependencies. + """ + + if "/" not in package: + raise RuntimeError('PACKAGE should be specified as "group/name"') + + package_group, package_name = package.split('/', 1) + + gl = get_gitlab_instance() + + # get potential channel upload and other auxiliary channels + channels = get_channels( + public=(not private), + stable=stable, + server=server, + intranet=ci, + group=package_group, + ) + + if condarc is not None: + logger.info("Loading CONDARC file from %s...", condarc) + with open(condarc, "rb") as f: + condarc_options = yaml.load(f, Loader=yaml.FullLoader) + else: + # use default and add channels + all_channels = [] + all_channels += channels + ["defaults"] + condarc_options = yaml.load(BASE_CONDARC, Loader=yaml.FullLoader) + logger.info( + "Using the following channels during build:\n - %s", + "\n - ".join(all_channels), + ) + condarc_options["channels"] = all_channels + + conda_config = make_conda_config( + config, python, append_file, condarc_options + ) + + set_environment("MATPLOTLIBRC", MATPLOTLIB_RCDIR) + + # setup BOB_DOCUMENTATION_SERVER environment variable (used for bob.extension + # and derived documentation building via Sphinx) + set_environment("DOCSERVER", server) + set_environment("BOB_DOCUMENTATION_SERVER", "/not/set") + + adj_matrix = compute_adjencence_matrix(gl, package, conda_config, + channels[0], deptypes=deptypes) + + graph = generate_graph(adj_matrix, deptypes=deptypes, whitelist=whitelist) + graph.render(name, format=format, cleanup=True) + diff --git a/bob/devtools/scripts/test.py b/bob/devtools/scripts/test.py index d1f28ae96f0920b8db886050cfb5c92a5e8cb71f..b000ca8a41e2eb5868db90f93fae47b3b18f9b43 100644 --- a/bob/devtools/scripts/test.py +++ b/bob/devtools/scripts/test.py @@ -15,7 +15,10 @@ from ..build import ( make_conda_config, get_docserver_setup, get_env_directory, + remove_conda_loggers, ) +remove_conda_loggers() + from ..constants import ( CONDA_BUILD_CONFIG, CONDA_RECIPE_APPEND, diff --git a/conda/meta.yaml b/conda/meta.yaml index 43c37974c39fb2029f9895f6ff8013d2e7979b68..838772de640254d1c2241400fb250a8ebc3aaad2 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -49,6 +49,7 @@ requirements: - termcolor - psutil - tabulate + - python-graphviz test: requires: @@ -104,6 +105,7 @@ test: - bdt dav upload --help - bdt gitlab process-pipelines --help - bdt gitlab get-pipelines --help + - bdt gitlab graph --help - sphinx-build -aEW ${PREFIX}/share/doc/{{ name }}/doc sphinx - if [ -n "${CI_PROJECT_DIR}" ]; then mv sphinx "${CI_PROJECT_DIR}/"; fi diff --git a/doc/api.rst b/doc/api.rst index 4b9a776a4b77535cb563ff196b1b6bd1c0aa8d2b..3132a59ac2f33da4ac8032eea12f4d23eb98aef4 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -14,6 +14,8 @@ bob.devtools.bootstrap bob.devtools.build bob.devtools.mirror + bob.devtools.deploy + bob.devtools.graph bob.devtools.webdav3.client @@ -34,8 +36,12 @@ Detailed Information .. automodule:: bob.devtools.build +.. automodule:: bob.devtools.mirror + .. automodule:: bob.devtools.deploy +.. automodule:: bob.devtools.graph + WebDAV Python Client -------------------- diff --git a/setup.py b/setup.py index 1e729138cb3b20b99f87804623628cb94586277e..f9ac0ca9b2d85691ec5c03520cae6c51d41e0db8 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,8 @@ setup( 'visibility = bob.devtools.scripts.visibility:visibility', 'getpath = bob.devtools.scripts.getpath:getpath', 'process-pipelines = bob.devtools.scripts.pipelines:process_pipelines', - 'get-pipelines- = bob.devtools.scripts.pipelines:get_pipelines', + 'get-pipelines- = bob.devtools.scripts.pipelines:get_pipelines', + 'graph = bob.devtools.scripts.graph:graph' ], 'bdt.ci.cli': [