Skip to content
Snippets Groups Projects
Commit 5e281f97 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[graph] Re-implement dependence graph generation as per my instructions to @tiago.pereira

parent 018e572d
No related branches found
No related tags found
1 merge request!126Implemented a mechanism to run a dependency graph
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import conda.cli.python_api
import json
import os
import re
import glob
import fnmatch
import tempfile
import tarfile
from io import BytesIO
from .bootstrap import set_environment
from .build import (
next_build_number,
get_rendered_metadata,
get_parsed_recipe,
get_output_path,
)
from .log import get_logger
logger = get_logger(__name__)
from .log import verbosity_option, get_logger, echo_info
logger = get_logger(__name__)
def compute_adjencence_matrix(gl, package, conda_config, main_channel,
recurse_regexp="^(bob|beat|batl|gridtk)(\.)?(?!-).*$", current={},
ref="master"):
"""
Given a target package, returns an adjacence matrix with its dependencies
returned via the conda-build API
Parameters
----------
from graphviz import Digraph
gl : object
Pre-instantiated instance of the gitlab server API to use, of type
:py:class:`gitlab.Gitlab`.
package : str
Name of the package, including its group in the format
``group/package``
def get_graphviz_dependency_graph(
graph_dict,
file_name,
prefix="bob.",
black_list=["python", "setuptools", "libcxx", "numpy", "libblitz", "boost"],
):
"""
Given a dictionary with the dependency graph, compute the graphviz DAG and save it
in SVG
"""
conda_config : dict
Dictionary of conda configuration options loaded from command-line and
read from defaults available.
d = Digraph(format="svg", engine="dot")
main_channel : str
Main channel to consider when looking for the next build number of
the target package
for i in graph_dict:
for j in graph_dict[i]:
# Conections to python, setuptools....gets very messy
if j in black_list:
continue
recurse_regexp : str
Regular expression to use, for determining where to recurse for
resolving dependencies. Typically, this should be set to a list of
packages which exists in gitlab. If it includes more than that, then
we may not be able to reach the package repository and an error will be
raised. The default expression avoids recursing over bob/beat-devel
packages.
current : dict
Current list of packages already inspected - useful for recurrent calls
to this function, to avoid potential cyclic dependencies. Corresponds
to the current return value of this function.
ref : str
Name of the git reference (branch, tag or commit hash) to use
if prefix in j:
d.attr("node", shape="box")
else:
d.attr("node", shape="ellipse")
d.edge(i, j)
d.render(file_name)
Returns
-------
adjacence_matrix : dict
A dictionary that contains the dependencies of all packages considered
in the recursion. The keys are the name of the packages, the values,
correspond to the dependencies (host, build, run and test) as a list of
strings.
def compute_dependency_graph(
package_name, channel=None, selected_packages=[], prefix="bob.", dependencies=dict()
):
"""
Given a target package, returns an adjacency matrix with its dependencies returned via the command `conda search xxxx --info`
**Parameters**
package_name:
Name of the package
channel:
Name of the channel to be sent via `-c` option. If None `conda search` will use what is in .condarc
use_package = gl.projects.get(package)
logger.info('Resolving graph for %s@%s',
use_package.attributes["path_with_namespace"], ref)
with tempfile.TemporaryDirectory() as tmpdir:
logger.debug('Downloading archive for %s...', ref)
archive = use_package.repository_archive(ref=ref) #in memory
logger.debug("Archive has %d bytes", len(archive))
with tarfile.open(fileobj=BytesIO(archive), mode="r:gz") as f:
f.extractall(path=tmpdir)
# use conda-build API to figure out all dependencies
recipe_dir = glob.glob(os.path.join(tmpdir, '*', 'conda'))[0]
logger.debug('Resolving conda recipe for package at %s...', recipe_dir)
if not os.path.exists(recipe_dir):
raise RuntimeError("The conda recipe directory %s does not " \
"exist" % recipe_dir)
version_candidate = os.path.join(recipe_dir, "..", "version.txt")
if os.path.exists(version_candidate):
version = open(version_candidate).read().rstrip()
set_environment("BOB_PACKAGE_VERSION", version)
# pre-renders the recipe - figures out the destination
metadata = get_rendered_metadata(recipe_dir, conda_config)
rendered_recipe = get_parsed_recipe(metadata)
path = get_output_path(metadata, conda_config)
# gets the next build number
build_number, _ = next_build_number(main_channel,
os.path.basename(path))
# at this point, all elements are parsed, I know the package version,
# build number and all dependencies
# host and build should have precise numbers to be used for building
# this package.
host = rendered_recipe['requirements'].get('host', [])
build = rendered_recipe['requirements'].get('build', [])
# run dependencies are more vague
run = rendered_recipe['requirements'].get('run', [])
# test dependencies even more vague
test = rendered_recipe.get('test', {}).get('requires', [])
# for each of the above sections, recurse in figuring out dependencies,
# if dependencies match a target set of globs
recurse_compiled = re.compile(recurse_regexp)
def _re_filter(l):
return [k for k in l if recurse_compiled.match(k)]
host_recurse = set([z.split()[0] for z in _re_filter(host)])
build_recurse = set([z.split()[0] for z in _re_filter(build)])
run_recurse = set([z.split()[0] for z in _re_filter(run)])
test_recurse = set([z.split()[0] for z in _re_filter(test)])
# we do not separate host/build/run/test dependencies and assume they
# will all be of the same version in the end. Otherwise, we would need
# to do this in a bit more careful way.
all_recurse = host_recurse | build_recurse | run_recurse | test_recurse
# complete the package group, which is not provided by conda-build
def _add_default_group(p):
if p.startswith('bob') or p.startswith('gridtk'):
return '/'.join(('bob', p))
elif p.startswith('beat'):
return '/'.join(('beat', p))
elif p.startswith('batl'):
return '/'.join(('batl', p))
else:
raise RuntimeError('Do not know how to recurse to package %s' \
% (p,))
all_recurse = set([_add_default_group(k) for k in all_recurse])
selected_packages:
List of target packages. If set, the returned adjacency matrix will be in terms of this list.
# do not recurse for packages we already know
all_recurse -= set(current.keys())
logger.debug("Recursing over the following packages: %s",
", ".join(all_recurse))
prefix:
Only seach for deep dependencies under the prefix. This would avoid to go deeper in
dependencies not maintained by us, such as, numpy, matplotlib, etc..
for dep in all_recurse:
dep_adjmtx = compute_adjencence_matrix(gl, dep, conda_config,
main_channel, recurse_regexp=recurse_regexp, ref=ref)
current.update(dep_adjmtx)
dependencies:
Dictionary controlling the state of each search
current[package] = dict(host=host, build=build, run=run, test=test,
version=rendered_recipe["package"]["version"],
name=rendered_recipe["package"]["name"],
build_string=os.path.basename(path).split('-')[-1].split('.')[0])
return current
def generate_graph(adjacence_matrix, deptypes, whitelist):
"""
Computes a graphviz/dot representation of the build graph
if package_name in dependencies:
return dependencies
Parameters
----------
dependencies[package_name] = fetch_dependencies(
package_name, channel, selected_packages
)
logger.info(f" >> Searching dependencies of {package_name}")
for d in dependencies[package_name]:
if prefix in d:
compute_dependency_graph(
d, channel, selected_packages, prefix, dependencies
)
return dependencies
adjacence_matrix : dict
A dictionary containing the adjacence matrix, that states the
dependencies for each package in the build, to other packages
deptypes : list
A list of dependence types to preserve when building the graph. If
empty, then preserve all. You may set values "build", "host",
"run" and "test", in any combination
def fetch_dependencies(package_name, channel=None, selected_packages=[]):
"""
conda search the dependencies of a package
whitelist : str
Regular expression for matching strings to preserve while building
the graph
Returns
-------
graph : graphviz.Digraph
The generated graph
**Parameters**
packge_name:
channel:
selected_packages:
"""
# Running conda search and returns to a json file
if channel is None:
package_description = conda.cli.python_api.run_command(
conda.cli.python_api.Commands.SEARCH, package_name, "--info", "--json"
)
else:
package_description = conda.cli.python_api.run_command(
conda.cli.python_api.Commands.SEARCH,
package_name,
"--info",
"-c",
channel,
"--json",
)
# TODO: Fix that
package_description = json.loads(package_description[0])
# Fetching the dependencies of the most updated package
all_dependencies = [
p.split(" ")[0] for p in package_description[package_name][-1]["depends"]
]
if len(selected_packages) > 0:
# Filtering the dependencies
return [d for d in selected_packages if d in all_dependencies]
return all_dependencies
from graphviz import Digraph
whitelist_compiled = re.compile(whitelist)
deptypes = deptypes if deptypes else ["host", "build", "run", "test"]
graph = Digraph()
nodes = {}
# generate nodes for all packages we want to track explicitly
for package, values in adjacence_matrix.items():
if not whitelist_compiled.match(values["name"]):
logger.debug("Skipping main package %s (did not match whitelist)",
value["name"])
continue
name = values["name"] + "\n" + values["version"] + "\n" \
+ values["build_string"]
nodes[values["name"]] = graph.node(values["name"], name, shape="box",
color="blue")
# generates nodes for all dependencies
for package, values in adjacence_matrix.items():
# ensures we only have the most complete dependence in the our list
deps = {}
to_consider = set()
for k in deptypes:
to_consider |= set(values[k])
for dep in to_consider:
name = dep.split()[0]
if name not in deps or (name in deps and not deps[name]):
deps[name] = dep.split()[1:]
for ref, parts in deps.items():
if not whitelist_compiled.match(ref):
logger.debug("Skipping dependence %s (did not match whitelist)",
ref)
continue
if not any([k == ref for k in nodes.keys()]):
# we do not have a node for that dependence, create it
name = str(ref) #new string
if len(parts) >= 1:
name += "\n" + parts[0] #dep version
if len(parts) >= 2:
name += "\n" + parts[1] #dep build
nodes[ref] = graph.node(ref, name)
# connects package -> dep
graph.edge(values["name"], ref)
return graph
......@@ -20,7 +20,10 @@ from ..build import (
get_docserver_setup,
get_env_directory,
get_output_path,
remove_conda_loggers,
)
remove_conda_loggers()
from ..constants import (
CONDA_BUILD_CONFIG,
CONDA_RECIPE_APPEND,
......@@ -182,7 +185,7 @@ def build(
from bob.devtools.bootstrap import do_hack
project_dir = os.path.dirname(recipe_dir[0])
do_hack(project_dir)
# get potential channel upload and other auxiliary channels
channels = get_channels(
......@@ -235,7 +238,7 @@ def build(
for d in recipe_dir:
if not os.path.exists(d):
raise RuntimeError("The directory %s does not exist" % recipe_dir)
raise RuntimeError("The directory %s does not exist" % d)
version_candidate = os.path.join(d, "..", "version.txt")
if os.path.exists(version_candidate):
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import yaml
import click
from click_plugins import with_plugins
from ..graph import compute_dependency_graph, get_graphviz_dependency_graph
from . import bdt
from ..constants import (
CONDA_BUILD_CONFIG,
CONDA_RECIPE_APPEND,
SERVER,
MATPLOTLIB_RCDIR,
BASE_CONDARC,
)
from ..build import make_conda_config
from ..bootstrap import set_environment, get_channels
from ..release import get_gitlab_instance
from ..graph import compute_adjencence_matrix, generate_graph
from ..log import verbosity_option, get_logger, echo_info
logger = get_logger(__name__)
@click.command(
epilog="""
Example:
Examples:
bdt graph bob.bio.face graph
1. Draws the graph of a package
$ bdt gitlab graph bob/bob.bio.face
"""
)
@click.argument("package_name", required=True)
@click.argument("output_file", required=True)
@click.argument("package", required=True)
@click.option(
"-c",
"--channel",
default=None,
help="Define a target channel for conda serch. If not set, will use what is set in .condarc",
"-p",
"--python",
default=("%d.%d" % sys.version_info[:2]),
show_default=True,
help="Version of python to build the environment for",
)
@click.option(
"-p",
"--prefix",
default="bob.",
help="It will recursivelly look into dependencies whose package name matches the prefix. Default 'bob.'",
"-r",
"--condarc",
help="Use custom conda configuration file instead of our own",
)
@click.option(
"-m",
"--config",
"--variant-config-files",
show_default=True,
default=CONDA_BUILD_CONFIG,
help="overwrites the path leading to " "variant configuration file to use",
)
@click.option(
"-a",
"--append-file",
show_default=True,
default=CONDA_RECIPE_APPEND,
help="overwrites the path leading to " "appended configuration file to use",
)
@click.option(
"-S",
"--server",
show_default=True,
default=SERVER,
help="Server used for downloading conda packages and documentation "
"indexes of required packages",
)
@click.option(
"-P",
"--private/--no-private",
default=False,
help="Set this to **include** private channels on your search - "
"you **must** be at Idiap to execute this build in this case - "
"you **must** also use the correct server name through --server - "
"notice this option has no effect to conda if you also pass --condarc",
)
@click.option(
"-X",
"--stable/--no-stable",
default=False,
help="Set this to **exclude** beta channels from your build - "
"notice this option has no effect if you also pass --condarc",
)
@click.option(
"-C",
"--ci/--no-ci",
default=False,
hidden=True,
help="Use this flag to indicate the graph will be running on the CI",
)
@click.option(
"-n",
"--name",
show_default=True,
default="graph",
help="set the graph name",
)
@click.option(
"-f",
"--format",
show_default=True,
default="svg",
help="determines the type of output to expect",
)
@click.option(
"-w",
"--whitelist",
show_default=True,
default="^(bob|beat|batl|gridtk)(\.)?(?!-).*$",
help="package regular expression to preserve in the graph, "
"use .* for keeping all packages, including non-maintained ones. The "
"current expression accepts most of our packages, excluding bob/beat-devel")
@verbosity_option()
def graph(package_name, output_file, channel, prefix):
@bdt.raise_on_error
def graph(package, python, condarc, config, append_file, server, private,
stable, ci, name, format, whitelist):
"""
Compute the dependency graph of a conda package and save it in an SVG file using graphviz.
Computes the dependency graph of a gitlab package (via its conda recipe)
and outputs an dot file that can be used by graphviz to draw a direct
acyclic graph (DAG) of package dependencies.
This command uses the conda-build API to resolve the package dependencies.
"""
logger.info(f"Computing dependency graph")
graph_dict = compute_dependency_graph(package_name, channel=channel, prefix=prefix)
logger.info("Generating SVG")
get_graphviz_dependency_graph(graph_dict, output_file, prefix=prefix)
if "/" not in package:
raise RuntimeError('PACKAGE should be specified as "group/name"')
package_group, package_name = package.split('/', 1)
gl = get_gitlab_instance()
# get potential channel upload and other auxiliary channels
channels = get_channels(
public=(not private),
stable=stable,
server=server,
intranet=ci,
group=package_group,
)
if condarc is not None:
logger.info("Loading CONDARC file from %s...", condarc)
with open(condarc, "rb") as f:
condarc_options = yaml.load(f, Loader=yaml.FullLoader)
else:
# use default and add channels
all_channels = []
all_channels += channels + ["defaults"]
condarc_options = yaml.load(BASE_CONDARC, Loader=yaml.FullLoader)
logger.info(
"Using the following channels during build:\n - %s",
"\n - ".join(all_channels),
)
condarc_options["channels"] = all_channels
conda_config = make_conda_config(
config, python, append_file, condarc_options
)
set_environment("MATPLOTLIBRC", MATPLOTLIB_RCDIR)
# setup BOB_DOCUMENTATION_SERVER environment variable (used for bob.extension
# and derived documentation building via Sphinx)
set_environment("DOCSERVER", server)
set_environment("BOB_DOCUMENTATION_SERVER", "/not/set")
adj_matrix = compute_adjencence_matrix(gl, package, conda_config,
channels[0])
graph = generate_graph(adj_matrix, deptypes=[], whitelist=whitelist)
graph.render(name, format=format, cleanup=True)
......@@ -15,7 +15,10 @@ from ..build import (
make_conda_config,
get_docserver_setup,
get_env_directory,
remove_conda_loggers,
)
remove_conda_loggers()
from ..constants import (
CONDA_BUILD_CONFIG,
CONDA_RECIPE_APPEND,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment