From 5e281f9724a7a7faeb7659d195e88578f2c3f765 Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Fri, 15 Nov 2019 09:37:08 +0100 Subject: [PATCH] [graph] Re-implement dependence graph generation as per my instructions to @tiago.pereira --- bob/devtools/graph.py | 314 ++++++++++++++++++++++++---------- bob/devtools/scripts/build.py | 7 +- bob/devtools/scripts/graph.py | 172 ++++++++++++++++--- bob/devtools/scripts/test.py | 3 + 4 files changed, 382 insertions(+), 114 deletions(-) diff --git a/bob/devtools/graph.py b/bob/devtools/graph.py index 70077ef2..059d0190 100644 --- a/bob/devtools/graph.py +++ b/bob/devtools/graph.py @@ -1,120 +1,250 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import conda.cli.python_api -import json +import os +import re +import glob +import fnmatch +import tempfile +import tarfile +from io import BytesIO + +from .bootstrap import set_environment +from .build import ( + next_build_number, + get_rendered_metadata, + get_parsed_recipe, + get_output_path, +) +from .log import get_logger +logger = get_logger(__name__) -from .log import verbosity_option, get_logger, echo_info -logger = get_logger(__name__) +def compute_adjencence_matrix(gl, package, conda_config, main_channel, + recurse_regexp="^(bob|beat|batl|gridtk)(\.)?(?!-).*$", current={}, + ref="master"): + """ + Given a target package, returns an adjacence matrix with its dependencies + returned via the conda-build API + Parameters + ---------- -from graphviz import Digraph + gl : object + Pre-instantiated instance of the gitlab server API to use, of type + :py:class:`gitlab.Gitlab`. + package : str + Name of the package, including its group in the format + ``group/package`` -def get_graphviz_dependency_graph( - graph_dict, - file_name, - prefix="bob.", - black_list=["python", "setuptools", "libcxx", "numpy", "libblitz", "boost"], -): - """ - Given a dictionary with the dependency graph, compute the graphviz DAG and save it - in SVG - """ + conda_config : dict + Dictionary of conda configuration options loaded from command-line and + read from defaults available. - d = Digraph(format="svg", engine="dot") + main_channel : str + Main channel to consider when looking for the next build number of + the target package - for i in graph_dict: - for j in graph_dict[i]: - # Conections to python, setuptools....gets very messy - if j in black_list: - continue + recurse_regexp : str + Regular expression to use, for determining where to recurse for + resolving dependencies. Typically, this should be set to a list of + packages which exists in gitlab. If it includes more than that, then + we may not be able to reach the package repository and an error will be + raised. The default expression avoids recursing over bob/beat-devel + packages. + + current : dict + Current list of packages already inspected - useful for recurrent calls + to this function, to avoid potential cyclic dependencies. Corresponds + to the current return value of this function. + + ref : str + Name of the git reference (branch, tag or commit hash) to use - if prefix in j: - d.attr("node", shape="box") - else: - d.attr("node", shape="ellipse") - d.edge(i, j) - d.render(file_name) + Returns + ------- + + adjacence_matrix : dict + A dictionary that contains the dependencies of all packages considered + in the recursion. The keys are the name of the packages, the values, + correspond to the dependencies (host, build, run and test) as a list of + strings. -def compute_dependency_graph( - package_name, channel=None, selected_packages=[], prefix="bob.", dependencies=dict() -): """ - Given a target package, returns an adjacency matrix with its dependencies returned via the command `conda search xxxx --info` - **Parameters** - - package_name: - Name of the package - - channel: - Name of the channel to be sent via `-c` option. If None `conda search` will use what is in .condarc + use_package = gl.projects.get(package) + + logger.info('Resolving graph for %s@%s', + use_package.attributes["path_with_namespace"], ref) + with tempfile.TemporaryDirectory() as tmpdir: + + logger.debug('Downloading archive for %s...', ref) + archive = use_package.repository_archive(ref=ref) #in memory + logger.debug("Archive has %d bytes", len(archive)) + + with tarfile.open(fileobj=BytesIO(archive), mode="r:gz") as f: + f.extractall(path=tmpdir) + + # use conda-build API to figure out all dependencies + recipe_dir = glob.glob(os.path.join(tmpdir, '*', 'conda'))[0] + logger.debug('Resolving conda recipe for package at %s...', recipe_dir) + if not os.path.exists(recipe_dir): + raise RuntimeError("The conda recipe directory %s does not " \ + "exist" % recipe_dir) + + version_candidate = os.path.join(recipe_dir, "..", "version.txt") + if os.path.exists(version_candidate): + version = open(version_candidate).read().rstrip() + set_environment("BOB_PACKAGE_VERSION", version) + + # pre-renders the recipe - figures out the destination + metadata = get_rendered_metadata(recipe_dir, conda_config) + rendered_recipe = get_parsed_recipe(metadata) + path = get_output_path(metadata, conda_config) + + # gets the next build number + build_number, _ = next_build_number(main_channel, + os.path.basename(path)) + + # at this point, all elements are parsed, I know the package version, + # build number and all dependencies + + # host and build should have precise numbers to be used for building + # this package. + host = rendered_recipe['requirements'].get('host', []) + build = rendered_recipe['requirements'].get('build', []) + + # run dependencies are more vague + run = rendered_recipe['requirements'].get('run', []) + + # test dependencies even more vague + test = rendered_recipe.get('test', {}).get('requires', []) + + # for each of the above sections, recurse in figuring out dependencies, + # if dependencies match a target set of globs + recurse_compiled = re.compile(recurse_regexp) + def _re_filter(l): + return [k for k in l if recurse_compiled.match(k)] + host_recurse = set([z.split()[0] for z in _re_filter(host)]) + build_recurse = set([z.split()[0] for z in _re_filter(build)]) + run_recurse = set([z.split()[0] for z in _re_filter(run)]) + test_recurse = set([z.split()[0] for z in _re_filter(test)]) + + # we do not separate host/build/run/test dependencies and assume they + # will all be of the same version in the end. Otherwise, we would need + # to do this in a bit more careful way. + all_recurse = host_recurse | build_recurse | run_recurse | test_recurse + + # complete the package group, which is not provided by conda-build + def _add_default_group(p): + if p.startswith('bob') or p.startswith('gridtk'): + return '/'.join(('bob', p)) + elif p.startswith('beat'): + return '/'.join(('beat', p)) + elif p.startswith('batl'): + return '/'.join(('batl', p)) + else: + raise RuntimeError('Do not know how to recurse to package %s' \ + % (p,)) + all_recurse = set([_add_default_group(k) for k in all_recurse]) - selected_packages: - List of target packages. If set, the returned adjacency matrix will be in terms of this list. + # do not recurse for packages we already know + all_recurse -= set(current.keys()) + logger.debug("Recursing over the following packages: %s", + ", ".join(all_recurse)) - prefix: - Only seach for deep dependencies under the prefix. This would avoid to go deeper in - dependencies not maintained by us, such as, numpy, matplotlib, etc.. + for dep in all_recurse: + dep_adjmtx = compute_adjencence_matrix(gl, dep, conda_config, + main_channel, recurse_regexp=recurse_regexp, ref=ref) + current.update(dep_adjmtx) - dependencies: - Dictionary controlling the state of each search + current[package] = dict(host=host, build=build, run=run, test=test, + version=rendered_recipe["package"]["version"], + name=rendered_recipe["package"]["name"], + build_string=os.path.basename(path).split('-')[-1].split('.')[0]) + return current + + +def generate_graph(adjacence_matrix, deptypes, whitelist): """ + Computes a graphviz/dot representation of the build graph - if package_name in dependencies: - return dependencies + Parameters + ---------- - dependencies[package_name] = fetch_dependencies( - package_name, channel, selected_packages - ) - logger.info(f" >> Searching dependencies of {package_name}") - for d in dependencies[package_name]: - if prefix in d: - compute_dependency_graph( - d, channel, selected_packages, prefix, dependencies - ) - return dependencies + adjacence_matrix : dict + A dictionary containing the adjacence matrix, that states the + dependencies for each package in the build, to other packages + deptypes : list + A list of dependence types to preserve when building the graph. If + empty, then preserve all. You may set values "build", "host", + "run" and "test", in any combination -def fetch_dependencies(package_name, channel=None, selected_packages=[]): - """ - conda search the dependencies of a package + whitelist : str + Regular expression for matching strings to preserve while building + the graph + + + Returns + ------- + + graph : graphviz.Digraph + The generated graph - **Parameters** - packge_name: - channel: - selected_packages: """ - # Running conda search and returns to a json file - if channel is None: - package_description = conda.cli.python_api.run_command( - conda.cli.python_api.Commands.SEARCH, package_name, "--info", "--json" - ) - else: - package_description = conda.cli.python_api.run_command( - conda.cli.python_api.Commands.SEARCH, - package_name, - "--info", - "-c", - channel, - "--json", - ) - - # TODO: Fix that - package_description = json.loads(package_description[0]) - - # Fetching the dependencies of the most updated package - all_dependencies = [ - p.split(" ")[0] for p in package_description[package_name][-1]["depends"] - ] - - if len(selected_packages) > 0: - # Filtering the dependencies - return [d for d in selected_packages if d in all_dependencies] - - return all_dependencies + from graphviz import Digraph + + whitelist_compiled = re.compile(whitelist) + deptypes = deptypes if deptypes else ["host", "build", "run", "test"] + + graph = Digraph() + nodes = {} + + # generate nodes for all packages we want to track explicitly + for package, values in adjacence_matrix.items(): + if not whitelist_compiled.match(values["name"]): + logger.debug("Skipping main package %s (did not match whitelist)", + value["name"]) + continue + name = values["name"] + "\n" + values["version"] + "\n" \ + + values["build_string"] + nodes[values["name"]] = graph.node(values["name"], name, shape="box", + color="blue") + + # generates nodes for all dependencies + for package, values in adjacence_matrix.items(): + + # ensures we only have the most complete dependence in the our list + deps = {} + to_consider = set() + for k in deptypes: + to_consider |= set(values[k]) + for dep in to_consider: + name = dep.split()[0] + if name not in deps or (name in deps and not deps[name]): + deps[name] = dep.split()[1:] + + for ref, parts in deps.items(): + if not whitelist_compiled.match(ref): + logger.debug("Skipping dependence %s (did not match whitelist)", + ref) + continue + + if not any([k == ref for k in nodes.keys()]): + # we do not have a node for that dependence, create it + name = str(ref) #new string + if len(parts) >= 1: + name += "\n" + parts[0] #dep version + if len(parts) >= 2: + name += "\n" + parts[1] #dep build + nodes[ref] = graph.node(ref, name) + + # connects package -> dep + graph.edge(values["name"], ref) + + return graph diff --git a/bob/devtools/scripts/build.py b/bob/devtools/scripts/build.py index 263b98f5..feac1b0a 100644 --- a/bob/devtools/scripts/build.py +++ b/bob/devtools/scripts/build.py @@ -20,7 +20,10 @@ from ..build import ( get_docserver_setup, get_env_directory, get_output_path, + remove_conda_loggers, ) +remove_conda_loggers() + from ..constants import ( CONDA_BUILD_CONFIG, CONDA_RECIPE_APPEND, @@ -182,7 +185,7 @@ def build( from bob.devtools.bootstrap import do_hack project_dir = os.path.dirname(recipe_dir[0]) do_hack(project_dir) - + # get potential channel upload and other auxiliary channels channels = get_channels( @@ -235,7 +238,7 @@ def build( for d in recipe_dir: if not os.path.exists(d): - raise RuntimeError("The directory %s does not exist" % recipe_dir) + raise RuntimeError("The directory %s does not exist" % d) version_candidate = os.path.join(d, "..", "version.txt") if os.path.exists(version_candidate): diff --git a/bob/devtools/scripts/graph.py b/bob/devtools/scripts/graph.py index 3415eacb..ffc790c4 100644 --- a/bob/devtools/scripts/graph.py +++ b/bob/devtools/scripts/graph.py @@ -1,45 +1,177 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import sys + +import yaml import click from click_plugins import with_plugins -from ..graph import compute_dependency_graph, get_graphviz_dependency_graph +from . import bdt +from ..constants import ( + CONDA_BUILD_CONFIG, + CONDA_RECIPE_APPEND, + SERVER, + MATPLOTLIB_RCDIR, + BASE_CONDARC, +) +from ..build import make_conda_config +from ..bootstrap import set_environment, get_channels +from ..release import get_gitlab_instance +from ..graph import compute_adjencence_matrix, generate_graph from ..log import verbosity_option, get_logger, echo_info - logger = get_logger(__name__) @click.command( epilog=""" -Example: +Examples: - bdt graph bob.bio.face graph + 1. Draws the graph of a package + + $ bdt gitlab graph bob/bob.bio.face """ ) -@click.argument("package_name", required=True) -@click.argument("output_file", required=True) +@click.argument("package", required=True) @click.option( - "-c", - "--channel", - default=None, - help="Define a target channel for conda serch. If not set, will use what is set in .condarc", + "-p", + "--python", + default=("%d.%d" % sys.version_info[:2]), + show_default=True, + help="Version of python to build the environment for", ) @click.option( - "-p", - "--prefix", - default="bob.", - help="It will recursivelly look into dependencies whose package name matches the prefix. Default 'bob.'", + "-r", + "--condarc", + help="Use custom conda configuration file instead of our own", +) +@click.option( + "-m", + "--config", + "--variant-config-files", + show_default=True, + default=CONDA_BUILD_CONFIG, + help="overwrites the path leading to " "variant configuration file to use", +) +@click.option( + "-a", + "--append-file", + show_default=True, + default=CONDA_RECIPE_APPEND, + help="overwrites the path leading to " "appended configuration file to use", +) +@click.option( + "-S", + "--server", + show_default=True, + default=SERVER, + help="Server used for downloading conda packages and documentation " + "indexes of required packages", +) +@click.option( + "-P", + "--private/--no-private", + default=False, + help="Set this to **include** private channels on your search - " + "you **must** be at Idiap to execute this build in this case - " + "you **must** also use the correct server name through --server - " + "notice this option has no effect to conda if you also pass --condarc", +) +@click.option( + "-X", + "--stable/--no-stable", + default=False, + help="Set this to **exclude** beta channels from your build - " + "notice this option has no effect if you also pass --condarc", +) +@click.option( + "-C", + "--ci/--no-ci", + default=False, + hidden=True, + help="Use this flag to indicate the graph will be running on the CI", +) +@click.option( + "-n", + "--name", + show_default=True, + default="graph", + help="set the graph name", +) +@click.option( + "-f", + "--format", + show_default=True, + default="svg", + help="determines the type of output to expect", ) +@click.option( + "-w", + "--whitelist", + show_default=True, + default="^(bob|beat|batl|gridtk)(\.)?(?!-).*$", + help="package regular expression to preserve in the graph, " + "use .* for keeping all packages, including non-maintained ones. The " + "current expression accepts most of our packages, excluding bob/beat-devel") @verbosity_option() -def graph(package_name, output_file, channel, prefix): +@bdt.raise_on_error +def graph(package, python, condarc, config, append_file, server, private, + stable, ci, name, format, whitelist): """ - Compute the dependency graph of a conda package and save it in an SVG file using graphviz. + Computes the dependency graph of a gitlab package (via its conda recipe) + and outputs an dot file that can be used by graphviz to draw a direct + acyclic graph (DAG) of package dependencies. + + This command uses the conda-build API to resolve the package dependencies. """ - logger.info(f"Computing dependency graph") - graph_dict = compute_dependency_graph(package_name, channel=channel, prefix=prefix) - logger.info("Generating SVG") - get_graphviz_dependency_graph(graph_dict, output_file, prefix=prefix) + + if "/" not in package: + raise RuntimeError('PACKAGE should be specified as "group/name"') + + package_group, package_name = package.split('/', 1) + + gl = get_gitlab_instance() + + # get potential channel upload and other auxiliary channels + channels = get_channels( + public=(not private), + stable=stable, + server=server, + intranet=ci, + group=package_group, + ) + + if condarc is not None: + logger.info("Loading CONDARC file from %s...", condarc) + with open(condarc, "rb") as f: + condarc_options = yaml.load(f, Loader=yaml.FullLoader) + else: + # use default and add channels + all_channels = [] + all_channels += channels + ["defaults"] + condarc_options = yaml.load(BASE_CONDARC, Loader=yaml.FullLoader) + logger.info( + "Using the following channels during build:\n - %s", + "\n - ".join(all_channels), + ) + condarc_options["channels"] = all_channels + + conda_config = make_conda_config( + config, python, append_file, condarc_options + ) + + set_environment("MATPLOTLIBRC", MATPLOTLIB_RCDIR) + + # setup BOB_DOCUMENTATION_SERVER environment variable (used for bob.extension + # and derived documentation building via Sphinx) + set_environment("DOCSERVER", server) + set_environment("BOB_DOCUMENTATION_SERVER", "/not/set") + + adj_matrix = compute_adjencence_matrix(gl, package, conda_config, + channels[0]) + + graph = generate_graph(adj_matrix, deptypes=[], whitelist=whitelist) + graph.render(name, format=format, cleanup=True) diff --git a/bob/devtools/scripts/test.py b/bob/devtools/scripts/test.py index d1f28ae9..b000ca8a 100644 --- a/bob/devtools/scripts/test.py +++ b/bob/devtools/scripts/test.py @@ -15,7 +15,10 @@ from ..build import ( make_conda_config, get_docserver_setup, get_env_directory, + remove_conda_loggers, ) +remove_conda_loggers() + from ..constants import ( CONDA_BUILD_CONFIG, CONDA_RECIPE_APPEND, -- GitLab