Skip to content
Snippets Groups Projects
Commit eaf89934 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[graph] Improve logging; Limit recursing to dependence types chosen from command-line

parent 7a3ba42d
No related branches found
No related tags found
1 merge request!126Implemented a mechanism to run a dependency graph
Pipeline #35403 passed
...@@ -16,13 +16,21 @@ from .build import ( ...@@ -16,13 +16,21 @@ from .build import (
get_parsed_recipe, get_parsed_recipe,
get_output_path, get_output_path,
) )
from .log import get_logger from .log import get_logger, echo_info
logger = get_logger(__name__) logger = get_logger(__name__)
def compute_adjencence_matrix(gl, package, conda_config, main_channel, def compute_adjencence_matrix(
recurse_regexp="^(bob|beat|batl|gridtk)(\.)?(?!-).*$", current={}, gl,
ref="master"): package,
conda_config,
main_channel,
recurse_regexp="^(bob|beat|batl|gridtk)(\.)?(?!-).*$",
current={},
ref="master",
deptypes=[],
):
""" """
Given a target package, returns an adjacence matrix with its dependencies Given a target package, returns an adjacence matrix with its dependencies
returned via the conda-build API returned via the conda-build API
...@@ -62,6 +70,11 @@ def compute_adjencence_matrix(gl, package, conda_config, main_channel, ...@@ -62,6 +70,11 @@ def compute_adjencence_matrix(gl, package, conda_config, main_channel,
ref : str ref : str
Name of the git reference (branch, tag or commit hash) to use Name of the git reference (branch, tag or commit hash) to use
deptypes : list
A list of dependence types to preserve when building the graph. If
empty, then preserve all. You may set values "build", "host",
"run" and "test", in any combination
Returns Returns
------- -------
...@@ -75,24 +88,31 @@ def compute_adjencence_matrix(gl, package, conda_config, main_channel, ...@@ -75,24 +88,31 @@ def compute_adjencence_matrix(gl, package, conda_config, main_channel,
""" """
use_package = gl.projects.get(package) use_package = gl.projects.get(package)
deptypes = deptypes if deptypes else ["host", "build", "run", "test"]
if use_package.attributes["path_with_namespace"] in current:
return current
logger.info('Resolving graph for %s@%s', echo_info(
use_package.attributes["path_with_namespace"], ref) "Resolving graph for %s@%s"
% (use_package.attributes["path_with_namespace"], ref)
)
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
logger.debug('Downloading archive for %s...', ref) logger.debug("Downloading archive for %s...", ref)
archive = use_package.repository_archive(ref=ref) #in memory archive = use_package.repository_archive(ref=ref) # in memory
logger.debug("Archive has %d bytes", len(archive)) logger.debug("Archive has %d bytes", len(archive))
with tarfile.open(fileobj=BytesIO(archive), mode="r:gz") as f: with tarfile.open(fileobj=BytesIO(archive), mode="r:gz") as f:
f.extractall(path=tmpdir) f.extractall(path=tmpdir)
# use conda-build API to figure out all dependencies # use conda-build API to figure out all dependencies
recipe_dir = glob.glob(os.path.join(tmpdir, '*', 'conda'))[0] recipe_dir = glob.glob(os.path.join(tmpdir, "*", "conda"))[0]
logger.debug('Resolving conda recipe for package at %s...', recipe_dir) logger.debug("Resolving conda recipe for package at %s...", recipe_dir)
if not os.path.exists(recipe_dir): if not os.path.exists(recipe_dir):
raise RuntimeError("The conda recipe directory %s does not " \ raise RuntimeError(
"exist" % recipe_dir) "The conda recipe directory %s does not " "exist" % recipe_dir
)
version_candidate = os.path.join(recipe_dir, "..", "version.txt") version_candidate = os.path.join(recipe_dir, "..", "version.txt")
if os.path.exists(version_candidate): if os.path.exists(version_candidate):
...@@ -105,65 +125,94 @@ def compute_adjencence_matrix(gl, package, conda_config, main_channel, ...@@ -105,65 +125,94 @@ def compute_adjencence_matrix(gl, package, conda_config, main_channel,
path = get_output_path(metadata, conda_config) path = get_output_path(metadata, conda_config)
# gets the next build number # gets the next build number
build_number, _ = next_build_number(main_channel, build_number, _ = next_build_number(
os.path.basename(path)) main_channel, os.path.basename(path)
)
# at this point, all elements are parsed, I know the package version, # at this point, all elements are parsed, I know the package version,
# build number and all dependencies # build number and all dependencies
# exclude stuff we are not interested in
# host and build should have precise numbers to be used for building # host and build should have precise numbers to be used for building
# this package. # this package.
host = rendered_recipe['requirements'].get('host', []) if "host" not in deptypes:
build = rendered_recipe['requirements'].get('build', []) host = []
else:
host = rendered_recipe["requirements"].get("host", [])
if "build" not in deptypes:
build = []
else:
build = rendered_recipe["requirements"].get("build", [])
# run dependencies are more vague # run dependencies are more vague
run = rendered_recipe['requirements'].get('run', []) if "run" not in deptypes:
run = []
else:
run = rendered_recipe["requirements"].get("run", [])
# test dependencies even more vague # test dependencies even more vague
test = rendered_recipe.get('test', {}).get('requires', []) if "test" not in deptypes:
test = []
else:
test = rendered_recipe.get("test", {}).get("requires", [])
# for each of the above sections, recurse in figuring out dependencies, # for each of the above sections, recurse in figuring out dependencies,
# if dependencies match a target set of globs # if dependencies match a target set of globs
recurse_compiled = re.compile(recurse_regexp) recurse_compiled = re.compile(recurse_regexp)
def _re_filter(l): def _re_filter(l):
return [k for k in l if recurse_compiled.match(k)] return [k for k in l if recurse_compiled.match(k)]
host_recurse = set([z.split()[0] for z in _re_filter(host)])
build_recurse = set([z.split()[0] for z in _re_filter(build)])
run_recurse = set([z.split()[0] for z in _re_filter(run)])
test_recurse = set([z.split()[0] for z in _re_filter(test)])
# we do not separate host/build/run/test dependencies and assume they all_recurse = set()
# will all be of the same version in the end. Otherwise, we would need all_recurse |= set([z.split()[0] for z in _re_filter(host)])
# to do this in a bit more careful way. all_recurse |= set([z.split()[0] for z in _re_filter(build)])
all_recurse = host_recurse | build_recurse | run_recurse | test_recurse all_recurse |= set([z.split()[0] for z in _re_filter(run)])
all_recurse |= set([z.split()[0] for z in _re_filter(test)])
# complete the package group, which is not provided by conda-build # complete the package group, which is not provided by conda-build
def _add_default_group(p): def _add_default_group(p):
if p.startswith('bob') or p.startswith('gridtk'): if p.startswith("bob") or p.startswith("gridtk"):
return '/'.join(('bob', p)) return "/".join(("bob", p))
elif p.startswith('beat'): elif p.startswith("beat"):
return '/'.join(('beat', p)) return "/".join(("beat", p))
elif p.startswith('batl'): elif p.startswith("batl"):
return '/'.join(('batl', p)) return "/".join(("batl", p))
else: else:
raise RuntimeError('Do not know how to recurse to package %s' \ logger.warning("Do not know how to recurse to package %s "
% (p,)) "(to which group does it belong?) - skipping...", p)
return None
all_recurse = set([_add_default_group(k) for k in all_recurse]) all_recurse = set([_add_default_group(k) for k in all_recurse])
if None in all_recurse:
all_recurse.remove(None)
# do not recurse for packages we already know # do not recurse for packages we already know
all_recurse -= set(current.keys()) all_recurse -= set(current.keys())
logger.debug("Recursing over the following packages: %s", logger.info("Recursing over the following packages: %s",
", ".join(all_recurse)) ", ".join(all_recurse))
for dep in all_recurse: for dep in all_recurse:
dep_adjmtx = compute_adjencence_matrix(gl, dep, conda_config, dep_adjmtx = compute_adjencence_matrix(
main_channel, recurse_regexp=recurse_regexp, ref=ref) gl,
dep,
conda_config,
main_channel,
recurse_regexp=recurse_regexp,
ref=ref,
deptypes=deptypes,
)
current.update(dep_adjmtx) current.update(dep_adjmtx)
current[package] = dict(host=host, build=build, run=run, test=test, current[package] = dict(
version=rendered_recipe["package"]["version"], host=host,
name=rendered_recipe["package"]["name"], build=build,
build_string=os.path.basename(path).split('-')[-1].split('.')[0]) run=run,
test=test,
version=rendered_recipe["package"]["version"],
name=rendered_recipe["package"]["name"],
build_string=os.path.basename(path).split("-")[-1].split(".")[0],
)
return current return current
...@@ -208,13 +257,21 @@ def generate_graph(adjacence_matrix, deptypes, whitelist): ...@@ -208,13 +257,21 @@ def generate_graph(adjacence_matrix, deptypes, whitelist):
# generate nodes for all packages we want to track explicitly # generate nodes for all packages we want to track explicitly
for package, values in adjacence_matrix.items(): for package, values in adjacence_matrix.items():
if not whitelist_compiled.match(values["name"]): if not whitelist_compiled.match(values["name"]):
logger.debug("Skipping main package %s (did not match whitelist)", logger.debug(
value["name"]) "Skipping main package %s (did not match whitelist)",
values["name"],
)
continue continue
name = values["name"] + "\n" + values["version"] + "\n" \ name = (
+ values["build_string"] values["name"]
nodes[values["name"]] = graph.node(values["name"], name, shape="box", + "\n"
color="blue") + values["version"]
+ "\n"
+ values["build_string"]
)
nodes[values["name"]] = graph.node(
values["name"], name, shape="box", color="blue"
)
# generates nodes for all dependencies # generates nodes for all dependencies
for package, values in adjacence_matrix.items(): for package, values in adjacence_matrix.items():
...@@ -231,17 +288,18 @@ def generate_graph(adjacence_matrix, deptypes, whitelist): ...@@ -231,17 +288,18 @@ def generate_graph(adjacence_matrix, deptypes, whitelist):
for ref, parts in deps.items(): for ref, parts in deps.items():
if not whitelist_compiled.match(ref): if not whitelist_compiled.match(ref):
logger.debug("Skipping dependence %s (did not match whitelist)", logger.debug(
ref) "Skipping dependence %s (did not match whitelist)", ref
)
continue continue
if not any([k == ref for k in nodes.keys()]): if not any([k == ref for k in nodes.keys()]):
# we do not have a node for that dependence, create it # we do not have a node for that dependence, create it
name = str(ref) #new string name = str(ref) # new string
if len(parts) >= 1: if len(parts) >= 1:
name += "\n" + parts[0] #dep version name += "\n" + parts[0] # dep version
if len(parts) >= 2: if len(parts) >= 2:
name += "\n" + parts[1] #dep build name += "\n" + parts[1] # dep build
nodes[ref] = graph.node(ref, name) nodes[ref] = graph.node(ref, name)
# connects package -> dep # connects package -> dep
......
...@@ -30,7 +30,9 @@ Examples: ...@@ -30,7 +30,9 @@ Examples:
1. Draws the graph of a package 1. Draws the graph of a package
$ bdt gitlab graph bob/bob.bio.face $ bdt gitlab graph -vv bob/bob.learn.linear
2.
""" """
...@@ -115,11 +117,22 @@ Examples: ...@@ -115,11 +117,22 @@ Examples:
default="^(bob|beat|batl|gridtk)(\.)?(?!-).*$", default="^(bob|beat|batl|gridtk)(\.)?(?!-).*$",
help="package regular expression to preserve in the graph, " help="package regular expression to preserve in the graph, "
"use .* for keeping all packages, including non-maintained ones. The " "use .* for keeping all packages, including non-maintained ones. The "
"current expression accepts most of our packages, excluding bob/beat-devel") "current expression accepts most of our packages, excluding "
"bob/beat-devel. This flag only affects the graph generation - we still "
"recurse over all packages to calculate dependencies.")
@click.option(
"-d",
"--deptypes",
show_default=True,
default=[],
multiple=True,
help="types of dependencies to consider. Pass multiple times to include "
"more types. Valid types are 'host', 'build', 'run' and 'test'. An "
"empty set considers all dependencies to the graph")
@verbosity_option() @verbosity_option()
@bdt.raise_on_error @bdt.raise_on_error
def graph(package, python, condarc, config, append_file, server, private, def graph(package, python, condarc, config, append_file, server, private,
stable, ci, name, format, whitelist): stable, ci, name, format, whitelist, deptypes):
""" """
Computes the dependency graph of a gitlab package (via its conda recipe) Computes the dependency graph of a gitlab package (via its conda recipe)
and outputs an dot file that can be used by graphviz to draw a direct and outputs an dot file that can be used by graphviz to draw a direct
...@@ -171,7 +184,8 @@ def graph(package, python, condarc, config, append_file, server, private, ...@@ -171,7 +184,8 @@ def graph(package, python, condarc, config, append_file, server, private,
set_environment("BOB_DOCUMENTATION_SERVER", "/not/set") set_environment("BOB_DOCUMENTATION_SERVER", "/not/set")
adj_matrix = compute_adjencence_matrix(gl, package, conda_config, adj_matrix = compute_adjencence_matrix(gl, package, conda_config,
channels[0]) channels[0], deptypes=deptypes)
graph = generate_graph(adj_matrix, deptypes=[], whitelist=whitelist) graph = generate_graph(adj_matrix, deptypes=deptypes, whitelist=whitelist)
graph.render(name, format=format, cleanup=True) graph.render(name, format=format, cleanup=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment