graph.py 10 KB
Newer Older
1 2 3
#!/usr/bin/env python
# -*- coding: utf-8 -*-

4 5
"""Utilities for calculating package dependencies and drawing graphs"""

6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
import os
import re
import glob
import fnmatch
import tempfile
import tarfile
from io import BytesIO

from .bootstrap import set_environment
from .build import (
    next_build_number,
    get_rendered_metadata,
    get_parsed_recipe,
    get_output_path,
)
21 22
from .log import get_logger, echo_info

23
logger = get_logger(__name__)
24 25


26 27 28 29 30 31 32 33 34 35
def compute_adjencence_matrix(
    gl,
    package,
    conda_config,
    main_channel,
    recurse_regexp="^(bob|beat|batl|gridtk)(\.)?(?!-).*$",
    current={},
    ref="master",
    deptypes=[],
):
36 37 38
    """
    Given a target package, returns an adjacence matrix with its dependencies
    returned via the conda-build API
39

40 41
    Parameters
    ----------
42

43 44 45
    gl : object
        Pre-instantiated instance of the gitlab server API to use, of type
        :py:class:`gitlab.Gitlab`.
46

47 48 49
    package : str
        Name of the package, including its group in the format
        ``group/package``
50

51 52 53
    conda_config : dict
        Dictionary of conda configuration options loaded from command-line and
        read from defaults available.
54

55 56 57
    main_channel : str
        Main channel to consider when looking for the next build number of
        the target package
58

59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
    recurse_regexp : str
        Regular expression to use, for determining where to recurse for
        resolving dependencies.  Typically, this should be set to a list of
        packages which exists in gitlab.  If it includes more than that, then
        we may not be able to reach the package repository and an error will be
        raised.  The default expression avoids recursing over bob/beat-devel
        packages.

    current : dict
        Current list of packages already inspected - useful for recurrent calls
        to this function, to avoid potential cyclic dependencies.  Corresponds
        to the current return value of this function.

    ref : str
        Name of the git reference (branch, tag or commit hash) to use
74

75 76 77 78 79
    deptypes : list
        A list of dependence types to preserve when building the graph.  If
        empty, then preserve all.  You may set values "build", "host",
        "run" and "test", in any combination

80

81 82 83 84 85 86 87 88
    Returns
    -------

    adjacence_matrix : dict
        A dictionary that contains the dependencies of all packages considered
        in the recursion.  The keys are the name of the packages, the values,
        correspond to the dependencies (host, build, run and test) as a list of
        strings.
89 90 91

    """

92
    use_package = gl.projects.get(package)
93 94 95 96
    deptypes = deptypes if deptypes else ["host", "build", "run", "test"]

    if use_package.attributes["path_with_namespace"] in current:
        return current
97

98 99 100 101
    echo_info(
        "Resolving graph for %s@%s"
        % (use_package.attributes["path_with_namespace"], ref)
    )
102 103
    with tempfile.TemporaryDirectory() as tmpdir:

104 105
        logger.debug("Downloading archive for %s...", ref)
        archive = use_package.repository_archive(ref=ref)  # in memory
106 107 108 109 110 111
        logger.debug("Archive has %d bytes", len(archive))

        with tarfile.open(fileobj=BytesIO(archive), mode="r:gz") as f:
            f.extractall(path=tmpdir)

        # use conda-build API to figure out all dependencies
112 113
        recipe_dir = glob.glob(os.path.join(tmpdir, "*", "conda"))[0]
        logger.debug("Resolving conda recipe for package at %s...", recipe_dir)
114
        if not os.path.exists(recipe_dir):
115 116 117
            raise RuntimeError(
                "The conda recipe directory %s does not " "exist" % recipe_dir
            )
118 119 120 121 122 123 124 125 126

        version_candidate = os.path.join(recipe_dir, "..", "version.txt")
        if os.path.exists(version_candidate):
            version = open(version_candidate).read().rstrip()
            set_environment("BOB_PACKAGE_VERSION", version)

        # pre-renders the recipe - figures out the destination
        metadata = get_rendered_metadata(recipe_dir, conda_config)
        rendered_recipe = get_parsed_recipe(metadata)
127
        path = get_output_path(metadata, conda_config)[0]
128 129

        # gets the next build number
130 131 132
        build_number, _ = next_build_number(
            main_channel, os.path.basename(path)
        )
133 134 135

        # at this point, all elements are parsed, I know the package version,
        # build number and all dependencies
136
        # exclude stuff we are not interested in
137 138 139

        # host and build should have precise numbers to be used for building
        # this package.
140 141 142 143 144 145 146 147 148
        if "host" not in deptypes:
            host = []
        else:
            host = rendered_recipe["requirements"].get("host", [])

        if "build" not in deptypes:
            build = []
        else:
            build = rendered_recipe["requirements"].get("build", [])
149 150

        # run dependencies are more vague
151 152 153 154
        if "run" not in deptypes:
            run = []
        else:
            run = rendered_recipe["requirements"].get("run", [])
155 156

        # test dependencies even more vague
157 158 159 160
        if "test" not in deptypes:
            test = []
        else:
            test = rendered_recipe.get("test", {}).get("requires", [])
161 162 163 164

        # for each of the above sections, recurse in figuring out dependencies,
        # if dependencies match a target set of globs
        recurse_compiled = re.compile(recurse_regexp)
165

166 167 168
        def _re_filter(l):
            return [k for k in l if recurse_compiled.match(k)]

169 170 171 172 173
        all_recurse = set()
        all_recurse |= set([z.split()[0] for z in _re_filter(host)])
        all_recurse |= set([z.split()[0] for z in _re_filter(build)])
        all_recurse |= set([z.split()[0] for z in _re_filter(run)])
        all_recurse |= set([z.split()[0] for z in _re_filter(test)])
174 175 176

        # complete the package group, which is not provided by conda-build
        def _add_default_group(p):
177 178 179 180 181 182
            if p.startswith("bob") or p.startswith("gridtk"):
                return "/".join(("bob", p))
            elif p.startswith("beat"):
                return "/".join(("beat", p))
            elif p.startswith("batl"):
                return "/".join(("batl", p))
183
            else:
184 185 186 187 188
                logger.warning(
                    "Do not know how to recurse to package %s "
                    "(to which group does it belong?) - skipping...",
                    p,
                )
189 190
                return None

191
        all_recurse = set([_add_default_group(k) for k in all_recurse])
192 193
        if None in all_recurse:
            all_recurse.remove(None)
194

195 196
        # do not recurse for packages we already know
        all_recurse -= set(current.keys())
197 198 199
        logger.info(
            "Recursing over the following packages: %s", ", ".join(all_recurse)
        )
200

201
        for dep in all_recurse:
202 203 204 205 206 207 208 209 210
            dep_adjmtx = compute_adjencence_matrix(
                gl,
                dep,
                conda_config,
                main_channel,
                recurse_regexp=recurse_regexp,
                ref=ref,
                deptypes=deptypes,
            )
211
            current.update(dep_adjmtx)
212

213 214 215 216 217 218 219 220 221
        current[package] = dict(
            host=host,
            build=build,
            run=run,
            test=test,
            version=rendered_recipe["package"]["version"],
            name=rendered_recipe["package"]["name"],
            build_string=os.path.basename(path).split("-")[-1].split(".")[0],
        )
222

223 224 225 226
    return current


def generate_graph(adjacence_matrix, deptypes, whitelist):
227
    """
228
    Computes a graphviz/dot representation of the build graph
229

230 231
    Parameters
    ----------
232

233 234 235
        adjacence_matrix : dict
            A dictionary containing the adjacence matrix, that states the
            dependencies for each package in the build, to other packages
236

237 238 239 240
        deptypes : list
            A list of dependence types to preserve when building the graph.  If
            empty, then preserve all.  You may set values "build", "host",
            "run" and "test", in any combination
241

242 243 244 245 246 247 248 249 250 251
        whitelist : str
            Regular expression for matching strings to preserve while building
            the graph


    Returns
    -------

        graph : graphviz.Digraph
            The generated graph
252 253 254

    """

255 256 257 258 259 260 261 262 263 264 265
    from graphviz import Digraph

    whitelist_compiled = re.compile(whitelist)
    deptypes = deptypes if deptypes else ["host", "build", "run", "test"]

    graph = Digraph()
    nodes = {}

    # generate nodes for all packages we want to track explicitly
    for package, values in adjacence_matrix.items():
        if not whitelist_compiled.match(values["name"]):
266 267 268 269
            logger.debug(
                "Skipping main package %s (did not match whitelist)",
                values["name"],
            )
270
            continue
271 272 273 274 275 276 277 278 279 280
        name = (
            values["name"]
            + "\n"
            + values["version"]
            + "\n"
            + values["build_string"]
        )
        nodes[values["name"]] = graph.node(
            values["name"], name, shape="box", color="blue"
        )
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296

    # generates nodes for all dependencies
    for package, values in adjacence_matrix.items():

        # ensures we only have the most complete dependence in the our list
        deps = {}
        to_consider = set()
        for k in deptypes:
            to_consider |= set(values[k])
        for dep in to_consider:
            name = dep.split()[0]
            if name not in deps or (name in deps and not deps[name]):
                deps[name] = dep.split()[1:]

        for ref, parts in deps.items():
            if not whitelist_compiled.match(ref):
297 298 299
                logger.debug(
                    "Skipping dependence %s (did not match whitelist)", ref
                )
300 301 302 303
                continue

            if not any([k == ref for k in nodes.keys()]):
                # we do not have a node for that dependence, create it
304
                name = str(ref)  # new string
305
                if len(parts) >= 1:
306
                    name += "\n" + parts[0]  # dep version
307
                if len(parts) >= 2:
308
                    name += "\n" + parts[1]  # dep build
309 310 311 312 313 314
                nodes[ref] = graph.node(ref, name)

            # connects package -> dep
            graph.edge(values["name"], ref)

    return graph