Skip to content
Snippets Groups Projects
Commit 5e281f97 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[graph] Re-implement dependence graph generation as per my instructions to @tiago.pereira

parent 018e572d
No related branches found
No related tags found
1 merge request!126Implemented a mechanism to run a dependency graph
This commit is part of merge request !126. Comments created here will be created in the context of that merge request.
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import conda.cli.python_api import os
import json import re
import glob
import fnmatch
import tempfile
import tarfile
from io import BytesIO
from .bootstrap import set_environment
from .build import (
next_build_number,
get_rendered_metadata,
get_parsed_recipe,
get_output_path,
)
from .log import get_logger
logger = get_logger(__name__)
from .log import verbosity_option, get_logger, echo_info
logger = get_logger(__name__) def compute_adjencence_matrix(gl, package, conda_config, main_channel,
recurse_regexp="^(bob|beat|batl|gridtk)(\.)?(?!-).*$", current={},
ref="master"):
"""
Given a target package, returns an adjacence matrix with its dependencies
returned via the conda-build API
Parameters
----------
from graphviz import Digraph gl : object
Pre-instantiated instance of the gitlab server API to use, of type
:py:class:`gitlab.Gitlab`.
package : str
Name of the package, including its group in the format
``group/package``
def get_graphviz_dependency_graph( conda_config : dict
graph_dict, Dictionary of conda configuration options loaded from command-line and
file_name, read from defaults available.
prefix="bob.",
black_list=["python", "setuptools", "libcxx", "numpy", "libblitz", "boost"],
):
"""
Given a dictionary with the dependency graph, compute the graphviz DAG and save it
in SVG
"""
d = Digraph(format="svg", engine="dot") main_channel : str
Main channel to consider when looking for the next build number of
the target package
for i in graph_dict: recurse_regexp : str
for j in graph_dict[i]: Regular expression to use, for determining where to recurse for
# Conections to python, setuptools....gets very messy resolving dependencies. Typically, this should be set to a list of
if j in black_list: packages which exists in gitlab. If it includes more than that, then
continue we may not be able to reach the package repository and an error will be
raised. The default expression avoids recursing over bob/beat-devel
packages.
current : dict
Current list of packages already inspected - useful for recurrent calls
to this function, to avoid potential cyclic dependencies. Corresponds
to the current return value of this function.
ref : str
Name of the git reference (branch, tag or commit hash) to use
if prefix in j:
d.attr("node", shape="box")
else:
d.attr("node", shape="ellipse")
d.edge(i, j)
d.render(file_name)
Returns
-------
adjacence_matrix : dict
A dictionary that contains the dependencies of all packages considered
in the recursion. The keys are the name of the packages, the values,
correspond to the dependencies (host, build, run and test) as a list of
strings.
def compute_dependency_graph(
package_name, channel=None, selected_packages=[], prefix="bob.", dependencies=dict()
):
""" """
Given a target package, returns an adjacency matrix with its dependencies returned via the command `conda search xxxx --info`
**Parameters** use_package = gl.projects.get(package)
package_name: logger.info('Resolving graph for %s@%s',
Name of the package use_package.attributes["path_with_namespace"], ref)
with tempfile.TemporaryDirectory() as tmpdir:
channel:
Name of the channel to be sent via `-c` option. If None `conda search` will use what is in .condarc logger.debug('Downloading archive for %s...', ref)
archive = use_package.repository_archive(ref=ref) #in memory
logger.debug("Archive has %d bytes", len(archive))
with tarfile.open(fileobj=BytesIO(archive), mode="r:gz") as f:
f.extractall(path=tmpdir)
# use conda-build API to figure out all dependencies
recipe_dir = glob.glob(os.path.join(tmpdir, '*', 'conda'))[0]
logger.debug('Resolving conda recipe for package at %s...', recipe_dir)
if not os.path.exists(recipe_dir):
raise RuntimeError("The conda recipe directory %s does not " \
"exist" % recipe_dir)
version_candidate = os.path.join(recipe_dir, "..", "version.txt")
if os.path.exists(version_candidate):
version = open(version_candidate).read().rstrip()
set_environment("BOB_PACKAGE_VERSION", version)
# pre-renders the recipe - figures out the destination
metadata = get_rendered_metadata(recipe_dir, conda_config)
rendered_recipe = get_parsed_recipe(metadata)
path = get_output_path(metadata, conda_config)
# gets the next build number
build_number, _ = next_build_number(main_channel,
os.path.basename(path))
# at this point, all elements are parsed, I know the package version,
# build number and all dependencies
# host and build should have precise numbers to be used for building
# this package.
host = rendered_recipe['requirements'].get('host', [])
build = rendered_recipe['requirements'].get('build', [])
# run dependencies are more vague
run = rendered_recipe['requirements'].get('run', [])
# test dependencies even more vague
test = rendered_recipe.get('test', {}).get('requires', [])
# for each of the above sections, recurse in figuring out dependencies,
# if dependencies match a target set of globs
recurse_compiled = re.compile(recurse_regexp)
def _re_filter(l):
return [k for k in l if recurse_compiled.match(k)]
host_recurse = set([z.split()[0] for z in _re_filter(host)])
build_recurse = set([z.split()[0] for z in _re_filter(build)])
run_recurse = set([z.split()[0] for z in _re_filter(run)])
test_recurse = set([z.split()[0] for z in _re_filter(test)])
# we do not separate host/build/run/test dependencies and assume they
# will all be of the same version in the end. Otherwise, we would need
# to do this in a bit more careful way.
all_recurse = host_recurse | build_recurse | run_recurse | test_recurse
# complete the package group, which is not provided by conda-build
def _add_default_group(p):
if p.startswith('bob') or p.startswith('gridtk'):
return '/'.join(('bob', p))
elif p.startswith('beat'):
return '/'.join(('beat', p))
elif p.startswith('batl'):
return '/'.join(('batl', p))
else:
raise RuntimeError('Do not know how to recurse to package %s' \
% (p,))
all_recurse = set([_add_default_group(k) for k in all_recurse])
selected_packages: # do not recurse for packages we already know
List of target packages. If set, the returned adjacency matrix will be in terms of this list. all_recurse -= set(current.keys())
logger.debug("Recursing over the following packages: %s",
", ".join(all_recurse))
prefix: for dep in all_recurse:
Only seach for deep dependencies under the prefix. This would avoid to go deeper in dep_adjmtx = compute_adjencence_matrix(gl, dep, conda_config,
dependencies not maintained by us, such as, numpy, matplotlib, etc.. main_channel, recurse_regexp=recurse_regexp, ref=ref)
current.update(dep_adjmtx)
dependencies: current[package] = dict(host=host, build=build, run=run, test=test,
Dictionary controlling the state of each search version=rendered_recipe["package"]["version"],
name=rendered_recipe["package"]["name"],
build_string=os.path.basename(path).split('-')[-1].split('.')[0])
return current
def generate_graph(adjacence_matrix, deptypes, whitelist):
""" """
Computes a graphviz/dot representation of the build graph
if package_name in dependencies: Parameters
return dependencies ----------
dependencies[package_name] = fetch_dependencies( adjacence_matrix : dict
package_name, channel, selected_packages A dictionary containing the adjacence matrix, that states the
) dependencies for each package in the build, to other packages
logger.info(f" >> Searching dependencies of {package_name}")
for d in dependencies[package_name]:
if prefix in d:
compute_dependency_graph(
d, channel, selected_packages, prefix, dependencies
)
return dependencies
deptypes : list
A list of dependence types to preserve when building the graph. If
empty, then preserve all. You may set values "build", "host",
"run" and "test", in any combination
def fetch_dependencies(package_name, channel=None, selected_packages=[]): whitelist : str
""" Regular expression for matching strings to preserve while building
conda search the dependencies of a package the graph
Returns
-------
graph : graphviz.Digraph
The generated graph
**Parameters**
packge_name:
channel:
selected_packages:
""" """
# Running conda search and returns to a json file from graphviz import Digraph
if channel is None:
package_description = conda.cli.python_api.run_command( whitelist_compiled = re.compile(whitelist)
conda.cli.python_api.Commands.SEARCH, package_name, "--info", "--json" deptypes = deptypes if deptypes else ["host", "build", "run", "test"]
)
else: graph = Digraph()
package_description = conda.cli.python_api.run_command( nodes = {}
conda.cli.python_api.Commands.SEARCH,
package_name, # generate nodes for all packages we want to track explicitly
"--info", for package, values in adjacence_matrix.items():
"-c", if not whitelist_compiled.match(values["name"]):
channel, logger.debug("Skipping main package %s (did not match whitelist)",
"--json", value["name"])
) continue
name = values["name"] + "\n" + values["version"] + "\n" \
# TODO: Fix that + values["build_string"]
package_description = json.loads(package_description[0]) nodes[values["name"]] = graph.node(values["name"], name, shape="box",
color="blue")
# Fetching the dependencies of the most updated package
all_dependencies = [ # generates nodes for all dependencies
p.split(" ")[0] for p in package_description[package_name][-1]["depends"] for package, values in adjacence_matrix.items():
]
# ensures we only have the most complete dependence in the our list
if len(selected_packages) > 0: deps = {}
# Filtering the dependencies to_consider = set()
return [d for d in selected_packages if d in all_dependencies] for k in deptypes:
to_consider |= set(values[k])
return all_dependencies for dep in to_consider:
name = dep.split()[0]
if name not in deps or (name in deps and not deps[name]):
deps[name] = dep.split()[1:]
for ref, parts in deps.items():
if not whitelist_compiled.match(ref):
logger.debug("Skipping dependence %s (did not match whitelist)",
ref)
continue
if not any([k == ref for k in nodes.keys()]):
# we do not have a node for that dependence, create it
name = str(ref) #new string
if len(parts) >= 1:
name += "\n" + parts[0] #dep version
if len(parts) >= 2:
name += "\n" + parts[1] #dep build
nodes[ref] = graph.node(ref, name)
# connects package -> dep
graph.edge(values["name"], ref)
return graph
...@@ -20,7 +20,10 @@ from ..build import ( ...@@ -20,7 +20,10 @@ from ..build import (
get_docserver_setup, get_docserver_setup,
get_env_directory, get_env_directory,
get_output_path, get_output_path,
remove_conda_loggers,
) )
remove_conda_loggers()
from ..constants import ( from ..constants import (
CONDA_BUILD_CONFIG, CONDA_BUILD_CONFIG,
CONDA_RECIPE_APPEND, CONDA_RECIPE_APPEND,
...@@ -182,7 +185,7 @@ def build( ...@@ -182,7 +185,7 @@ def build(
from bob.devtools.bootstrap import do_hack from bob.devtools.bootstrap import do_hack
project_dir = os.path.dirname(recipe_dir[0]) project_dir = os.path.dirname(recipe_dir[0])
do_hack(project_dir) do_hack(project_dir)
# get potential channel upload and other auxiliary channels # get potential channel upload and other auxiliary channels
channels = get_channels( channels = get_channels(
...@@ -235,7 +238,7 @@ def build( ...@@ -235,7 +238,7 @@ def build(
for d in recipe_dir: for d in recipe_dir:
if not os.path.exists(d): if not os.path.exists(d):
raise RuntimeError("The directory %s does not exist" % recipe_dir) raise RuntimeError("The directory %s does not exist" % d)
version_candidate = os.path.join(d, "..", "version.txt") version_candidate = os.path.join(d, "..", "version.txt")
if os.path.exists(version_candidate): if os.path.exists(version_candidate):
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys
import yaml
import click import click
from click_plugins import with_plugins from click_plugins import with_plugins
from ..graph import compute_dependency_graph, get_graphviz_dependency_graph from . import bdt
from ..constants import (
CONDA_BUILD_CONFIG,
CONDA_RECIPE_APPEND,
SERVER,
MATPLOTLIB_RCDIR,
BASE_CONDARC,
)
from ..build import make_conda_config
from ..bootstrap import set_environment, get_channels
from ..release import get_gitlab_instance
from ..graph import compute_adjencence_matrix, generate_graph
from ..log import verbosity_option, get_logger, echo_info from ..log import verbosity_option, get_logger, echo_info
logger = get_logger(__name__) logger = get_logger(__name__)
@click.command( @click.command(
epilog=""" epilog="""
Example: Examples:
bdt graph bob.bio.face graph 1. Draws the graph of a package
$ bdt gitlab graph bob/bob.bio.face
""" """
) )
@click.argument("package_name", required=True) @click.argument("package", required=True)
@click.argument("output_file", required=True)
@click.option( @click.option(
"-c", "-p",
"--channel", "--python",
default=None, default=("%d.%d" % sys.version_info[:2]),
help="Define a target channel for conda serch. If not set, will use what is set in .condarc", show_default=True,
help="Version of python to build the environment for",
) )
@click.option( @click.option(
"-p", "-r",
"--prefix", "--condarc",
default="bob.", help="Use custom conda configuration file instead of our own",
help="It will recursivelly look into dependencies whose package name matches the prefix. Default 'bob.'", )
@click.option(
"-m",
"--config",
"--variant-config-files",
show_default=True,
default=CONDA_BUILD_CONFIG,
help="overwrites the path leading to " "variant configuration file to use",
)
@click.option(
"-a",
"--append-file",
show_default=True,
default=CONDA_RECIPE_APPEND,
help="overwrites the path leading to " "appended configuration file to use",
)
@click.option(
"-S",
"--server",
show_default=True,
default=SERVER,
help="Server used for downloading conda packages and documentation "
"indexes of required packages",
)
@click.option(
"-P",
"--private/--no-private",
default=False,
help="Set this to **include** private channels on your search - "
"you **must** be at Idiap to execute this build in this case - "
"you **must** also use the correct server name through --server - "
"notice this option has no effect to conda if you also pass --condarc",
)
@click.option(
"-X",
"--stable/--no-stable",
default=False,
help="Set this to **exclude** beta channels from your build - "
"notice this option has no effect if you also pass --condarc",
)
@click.option(
"-C",
"--ci/--no-ci",
default=False,
hidden=True,
help="Use this flag to indicate the graph will be running on the CI",
)
@click.option(
"-n",
"--name",
show_default=True,
default="graph",
help="set the graph name",
)
@click.option(
"-f",
"--format",
show_default=True,
default="svg",
help="determines the type of output to expect",
) )
@click.option(
"-w",
"--whitelist",
show_default=True,
default="^(bob|beat|batl|gridtk)(\.)?(?!-).*$",
help="package regular expression to preserve in the graph, "
"use .* for keeping all packages, including non-maintained ones. The "
"current expression accepts most of our packages, excluding bob/beat-devel")
@verbosity_option() @verbosity_option()
def graph(package_name, output_file, channel, prefix): @bdt.raise_on_error
def graph(package, python, condarc, config, append_file, server, private,
stable, ci, name, format, whitelist):
""" """
Compute the dependency graph of a conda package and save it in an SVG file using graphviz. Computes the dependency graph of a gitlab package (via its conda recipe)
and outputs an dot file that can be used by graphviz to draw a direct
acyclic graph (DAG) of package dependencies.
This command uses the conda-build API to resolve the package dependencies.
""" """
logger.info(f"Computing dependency graph")
graph_dict = compute_dependency_graph(package_name, channel=channel, prefix=prefix) if "/" not in package:
logger.info("Generating SVG") raise RuntimeError('PACKAGE should be specified as "group/name"')
get_graphviz_dependency_graph(graph_dict, output_file, prefix=prefix)
package_group, package_name = package.split('/', 1)
gl = get_gitlab_instance()
# get potential channel upload and other auxiliary channels
channels = get_channels(
public=(not private),
stable=stable,
server=server,
intranet=ci,
group=package_group,
)
if condarc is not None:
logger.info("Loading CONDARC file from %s...", condarc)
with open(condarc, "rb") as f:
condarc_options = yaml.load(f, Loader=yaml.FullLoader)
else:
# use default and add channels
all_channels = []
all_channels += channels + ["defaults"]
condarc_options = yaml.load(BASE_CONDARC, Loader=yaml.FullLoader)
logger.info(
"Using the following channels during build:\n - %s",
"\n - ".join(all_channels),
)
condarc_options["channels"] = all_channels
conda_config = make_conda_config(
config, python, append_file, condarc_options
)
set_environment("MATPLOTLIBRC", MATPLOTLIB_RCDIR)
# setup BOB_DOCUMENTATION_SERVER environment variable (used for bob.extension
# and derived documentation building via Sphinx)
set_environment("DOCSERVER", server)
set_environment("BOB_DOCUMENTATION_SERVER", "/not/set")
adj_matrix = compute_adjencence_matrix(gl, package, conda_config,
channels[0])
graph = generate_graph(adj_matrix, deptypes=[], whitelist=whitelist)
graph.render(name, format=format, cleanup=True)
...@@ -15,7 +15,10 @@ from ..build import ( ...@@ -15,7 +15,10 @@ from ..build import (
make_conda_config, make_conda_config,
get_docserver_setup, get_docserver_setup,
get_env_directory, get_env_directory,
remove_conda_loggers,
) )
remove_conda_loggers()
from ..constants import ( from ..constants import (
CONDA_BUILD_CONFIG, CONDA_BUILD_CONFIG,
CONDA_RECIPE_APPEND, CONDA_RECIPE_APPEND,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment