From 5e281f9724a7a7faeb7659d195e88578f2c3f765 Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.dos.anjos@gmail.com>
Date: Fri, 15 Nov 2019 09:37:08 +0100
Subject: [PATCH] [graph] Re-implement dependence graph generation as per my
 instructions to @tiago.pereira

---
 bob/devtools/graph.py         | 314 ++++++++++++++++++++++++----------
 bob/devtools/scripts/build.py |   7 +-
 bob/devtools/scripts/graph.py | 172 ++++++++++++++++---
 bob/devtools/scripts/test.py  |   3 +
 4 files changed, 382 insertions(+), 114 deletions(-)

diff --git a/bob/devtools/graph.py b/bob/devtools/graph.py
index 70077ef2..059d0190 100644
--- a/bob/devtools/graph.py
+++ b/bob/devtools/graph.py
@@ -1,120 +1,250 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import conda.cli.python_api
-import json
+import os
+import re
+import glob
+import fnmatch
+import tempfile
+import tarfile
+from io import BytesIO
+
+from .bootstrap import set_environment
+from .build import (
+    next_build_number,
+    get_rendered_metadata,
+    get_parsed_recipe,
+    get_output_path,
+)
+from .log import get_logger
+logger = get_logger(__name__)
 
-from .log import verbosity_option, get_logger, echo_info
 
-logger = get_logger(__name__)
+def compute_adjencence_matrix(gl, package, conda_config, main_channel,
+        recurse_regexp="^(bob|beat|batl|gridtk)(\.)?(?!-).*$", current={},
+        ref="master"):
+    """
+    Given a target package, returns an adjacence matrix with its dependencies
+    returned via the conda-build API
 
+    Parameters
+    ----------
 
-from graphviz import Digraph
+    gl : object
+        Pre-instantiated instance of the gitlab server API to use, of type
+        :py:class:`gitlab.Gitlab`.
 
+    package : str
+        Name of the package, including its group in the format
+        ``group/package``
 
-def get_graphviz_dependency_graph(
-    graph_dict,
-    file_name,
-    prefix="bob.",
-    black_list=["python", "setuptools", "libcxx", "numpy", "libblitz", "boost"],
-):
-    """
-    Given a dictionary with the dependency graph, compute the graphviz DAG and save it
-    in SVG
-    """
+    conda_config : dict
+        Dictionary of conda configuration options loaded from command-line and
+        read from defaults available.
 
-    d = Digraph(format="svg", engine="dot")
+    main_channel : str
+        Main channel to consider when looking for the next build number of
+        the target package
 
-    for i in graph_dict:
-        for j in graph_dict[i]:
-            # Conections to python, setuptools....gets very messy
-            if j in black_list:
-                continue
+    recurse_regexp : str
+        Regular expression to use, for determining where to recurse for
+        resolving dependencies.  Typically, this should be set to a list of
+        packages which exists in gitlab.  If it includes more than that, then
+        we may not be able to reach the package repository and an error will be
+        raised.  The default expression avoids recursing over bob/beat-devel
+        packages.
+
+    current : dict
+        Current list of packages already inspected - useful for recurrent calls
+        to this function, to avoid potential cyclic dependencies.  Corresponds
+        to the current return value of this function.
+
+    ref : str
+        Name of the git reference (branch, tag or commit hash) to use
 
-            if prefix in j:
-                d.attr("node", shape="box")
-            else:
-                d.attr("node", shape="ellipse")
-            d.edge(i, j)
-    d.render(file_name)
 
+    Returns
+    -------
+
+    adjacence_matrix : dict
+        A dictionary that contains the dependencies of all packages considered
+        in the recursion.  The keys are the name of the packages, the values,
+        correspond to the dependencies (host, build, run and test) as a list of
+        strings.
 
-def compute_dependency_graph(
-    package_name, channel=None, selected_packages=[], prefix="bob.", dependencies=dict()
-):
     """
-    Given a target package, returns an adjacency matrix with its dependencies returned via the command `conda search xxxx --info` 
 
-    **Parameters**
-       
-       package_name:
-          Name of the package
-       
-       channel:
-          Name of the channel to be sent via `-c` option. If None `conda search` will use what is in .condarc
+    use_package = gl.projects.get(package)
+
+    logger.info('Resolving graph for %s@%s',
+            use_package.attributes["path_with_namespace"], ref)
+    with tempfile.TemporaryDirectory() as tmpdir:
+
+        logger.debug('Downloading archive for %s...', ref)
+        archive = use_package.repository_archive(ref=ref)  #in memory
+        logger.debug("Archive has %d bytes", len(archive))
+
+        with tarfile.open(fileobj=BytesIO(archive), mode="r:gz") as f:
+            f.extractall(path=tmpdir)
+
+        # use conda-build API to figure out all dependencies
+        recipe_dir = glob.glob(os.path.join(tmpdir, '*', 'conda'))[0]
+        logger.debug('Resolving conda recipe for package at %s...', recipe_dir)
+        if not os.path.exists(recipe_dir):
+            raise RuntimeError("The conda recipe directory %s does not " \
+                    "exist" % recipe_dir)
+
+        version_candidate = os.path.join(recipe_dir, "..", "version.txt")
+        if os.path.exists(version_candidate):
+            version = open(version_candidate).read().rstrip()
+            set_environment("BOB_PACKAGE_VERSION", version)
+
+        # pre-renders the recipe - figures out the destination
+        metadata = get_rendered_metadata(recipe_dir, conda_config)
+        rendered_recipe = get_parsed_recipe(metadata)
+        path = get_output_path(metadata, conda_config)
+
+        # gets the next build number
+        build_number, _ = next_build_number(main_channel,
+                os.path.basename(path))
+
+        # at this point, all elements are parsed, I know the package version,
+        # build number and all dependencies
+
+        # host and build should have precise numbers to be used for building
+        # this package.
+        host = rendered_recipe['requirements'].get('host', [])
+        build = rendered_recipe['requirements'].get('build', [])
+
+        # run dependencies are more vague
+        run = rendered_recipe['requirements'].get('run', [])
+
+        # test dependencies even more vague
+        test = rendered_recipe.get('test', {}).get('requires', [])
+
+        # for each of the above sections, recurse in figuring out dependencies,
+        # if dependencies match a target set of globs
+        recurse_compiled = re.compile(recurse_regexp)
+        def _re_filter(l):
+            return [k for k in l if recurse_compiled.match(k)]
+        host_recurse = set([z.split()[0] for z in _re_filter(host)])
+        build_recurse = set([z.split()[0] for z in _re_filter(build)])
+        run_recurse = set([z.split()[0] for z in _re_filter(run)])
+        test_recurse = set([z.split()[0] for z in _re_filter(test)])
+
+        # we do not separate host/build/run/test dependencies and assume they
+        # will all be of the same version in the end.  Otherwise, we would need
+        # to do this in a bit more careful way.
+        all_recurse = host_recurse | build_recurse | run_recurse | test_recurse
+
+        # complete the package group, which is not provided by conda-build
+        def _add_default_group(p):
+            if p.startswith('bob') or p.startswith('gridtk'):
+                return '/'.join(('bob', p))
+            elif p.startswith('beat'):
+                return '/'.join(('beat', p))
+            elif p.startswith('batl'):
+                return '/'.join(('batl', p))
+            else:
+                raise RuntimeError('Do not know how to recurse to package %s' \
+                        % (p,))
+        all_recurse = set([_add_default_group(k) for k in all_recurse])
 
-       selected_packages:
-          List of target packages. If set, the returned adjacency matrix will be in terms of this list.
+        # do not recurse for packages we already know
+        all_recurse -= set(current.keys())
+        logger.debug("Recursing over the following packages: %s",
+                ", ".join(all_recurse))
 
-       prefix:
-          Only seach for deep dependencies under the prefix. This would avoid to go deeper in 
-          dependencies not maintained by us, such as, numpy, matplotlib, etc..
+        for dep in all_recurse:
+            dep_adjmtx = compute_adjencence_matrix(gl, dep, conda_config,
+                    main_channel, recurse_regexp=recurse_regexp, ref=ref)
+            current.update(dep_adjmtx)
 
-       dependencies:
-          Dictionary controlling the state of each search
+        current[package] = dict(host=host, build=build, run=run, test=test,
+                version=rendered_recipe["package"]["version"],
+                name=rendered_recipe["package"]["name"],
+                build_string=os.path.basename(path).split('-')[-1].split('.')[0])
 
+    return current
+
+
+def generate_graph(adjacence_matrix, deptypes, whitelist):
     """
+    Computes a graphviz/dot representation of the build graph
 
-    if package_name in dependencies:
-        return dependencies
+    Parameters
+    ----------
 
-    dependencies[package_name] = fetch_dependencies(
-        package_name, channel, selected_packages
-    )
-    logger.info(f"  >> Searching dependencies of {package_name}")
-    for d in dependencies[package_name]:
-        if prefix in d:
-            compute_dependency_graph(
-                d, channel, selected_packages, prefix, dependencies
-            )
-    return dependencies
+        adjacence_matrix : dict
+            A dictionary containing the adjacence matrix, that states the
+            dependencies for each package in the build, to other packages
 
+        deptypes : list
+            A list of dependence types to preserve when building the graph.  If
+            empty, then preserve all.  You may set values "build", "host",
+            "run" and "test", in any combination
 
-def fetch_dependencies(package_name, channel=None, selected_packages=[]):
-    """
-    conda search the dependencies of a package
+        whitelist : str
+            Regular expression for matching strings to preserve while building
+            the graph
+
+
+    Returns
+    -------
+
+        graph : graphviz.Digraph
+            The generated graph
 
-    **Parameters**
-        packge_name:
-        channel:
-        selected_packages:
     """
 
-    # Running conda search and returns to a json file
-    if channel is None:
-        package_description = conda.cli.python_api.run_command(
-            conda.cli.python_api.Commands.SEARCH, package_name, "--info", "--json"
-        )
-    else:
-        package_description = conda.cli.python_api.run_command(
-            conda.cli.python_api.Commands.SEARCH,
-            package_name,
-            "--info",
-            "-c",
-            channel,
-            "--json",
-        )
-
-    # TODO: Fix that
-    package_description = json.loads(package_description[0])
-
-    # Fetching the dependencies of the most updated package
-    all_dependencies = [
-        p.split(" ")[0] for p in package_description[package_name][-1]["depends"]
-    ]
-
-    if len(selected_packages) > 0:
-        # Filtering the dependencies
-        return [d for d in selected_packages if d in all_dependencies]
-
-    return all_dependencies
+    from graphviz import Digraph
+
+    whitelist_compiled = re.compile(whitelist)
+    deptypes = deptypes if deptypes else ["host", "build", "run", "test"]
+
+    graph = Digraph()
+    nodes = {}
+
+    # generate nodes for all packages we want to track explicitly
+    for package, values in adjacence_matrix.items():
+        if not whitelist_compiled.match(values["name"]):
+            logger.debug("Skipping main package %s (did not match whitelist)",
+                    value["name"])
+            continue
+        name = values["name"] + "\n" + values["version"] + "\n" \
+                + values["build_string"]
+        nodes[values["name"]] = graph.node(values["name"], name, shape="box",
+                color="blue")
+
+    # generates nodes for all dependencies
+    for package, values in adjacence_matrix.items():
+
+        # ensures we only have the most complete dependence in the our list
+        deps = {}
+        to_consider = set()
+        for k in deptypes:
+            to_consider |= set(values[k])
+        for dep in to_consider:
+            name = dep.split()[0]
+            if name not in deps or (name in deps and not deps[name]):
+                deps[name] = dep.split()[1:]
+
+        for ref, parts in deps.items():
+            if not whitelist_compiled.match(ref):
+                logger.debug("Skipping dependence %s (did not match whitelist)",
+                        ref)
+                continue
+
+            if not any([k == ref for k in nodes.keys()]):
+                # we do not have a node for that dependence, create it
+                name = str(ref)  #new string
+                if len(parts) >= 1:
+                    name += "\n" + parts[0]  #dep version
+                if len(parts) >= 2:
+                    name += "\n" + parts[1]  #dep build
+                nodes[ref] = graph.node(ref, name)
+
+            # connects package -> dep
+            graph.edge(values["name"], ref)
+
+    return graph
diff --git a/bob/devtools/scripts/build.py b/bob/devtools/scripts/build.py
index 263b98f5..feac1b0a 100644
--- a/bob/devtools/scripts/build.py
+++ b/bob/devtools/scripts/build.py
@@ -20,7 +20,10 @@ from ..build import (
     get_docserver_setup,
     get_env_directory,
     get_output_path,
+    remove_conda_loggers,
 )
+remove_conda_loggers()
+
 from ..constants import (
     CONDA_BUILD_CONFIG,
     CONDA_RECIPE_APPEND,
@@ -182,7 +185,7 @@ def build(
     from bob.devtools.bootstrap import do_hack
     project_dir = os.path.dirname(recipe_dir[0])
     do_hack(project_dir)
-    
+
 
     # get potential channel upload and other auxiliary channels
     channels = get_channels(
@@ -235,7 +238,7 @@ def build(
     for d in recipe_dir:
 
         if not os.path.exists(d):
-            raise RuntimeError("The directory %s does not exist" % recipe_dir)
+            raise RuntimeError("The directory %s does not exist" % d)
 
         version_candidate = os.path.join(d, "..", "version.txt")
         if os.path.exists(version_candidate):
diff --git a/bob/devtools/scripts/graph.py b/bob/devtools/scripts/graph.py
index 3415eacb..ffc790c4 100644
--- a/bob/devtools/scripts/graph.py
+++ b/bob/devtools/scripts/graph.py
@@ -1,45 +1,177 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+import sys
+
+import yaml
 import click
 from click_plugins import with_plugins
 
-from ..graph import compute_dependency_graph, get_graphviz_dependency_graph
+from . import bdt
+from ..constants import (
+    CONDA_BUILD_CONFIG,
+    CONDA_RECIPE_APPEND,
+    SERVER,
+    MATPLOTLIB_RCDIR,
+    BASE_CONDARC,
+)
+from ..build import make_conda_config
+from ..bootstrap import set_environment, get_channels
+from ..release import get_gitlab_instance
+from ..graph import compute_adjencence_matrix, generate_graph
 
 from ..log import verbosity_option, get_logger, echo_info
-
 logger = get_logger(__name__)
 
 
 @click.command(
     epilog="""
-Example:
+Examples:
 
-   bdt graph bob.bio.face graph
+  1. Draws the graph of a package
+
+     $ bdt gitlab graph bob/bob.bio.face
 
 
 """
 )
-@click.argument("package_name", required=True)
-@click.argument("output_file", required=True)
+@click.argument("package", required=True)
 @click.option(
-    "-c",
-    "--channel",
-    default=None,
-    help="Define a target channel for conda serch. If not set, will use what is set in .condarc",
+    "-p",
+    "--python",
+    default=("%d.%d" % sys.version_info[:2]),
+    show_default=True,
+    help="Version of python to build the environment for",
 )
 @click.option(
-    "-p",
-    "--prefix",
-    default="bob.",
-    help="It will recursivelly look into dependencies whose package name matches the prefix. Default 'bob.'",
+    "-r",
+    "--condarc",
+    help="Use custom conda configuration file instead of our own",
+)
+@click.option(
+    "-m",
+    "--config",
+    "--variant-config-files",
+    show_default=True,
+    default=CONDA_BUILD_CONFIG,
+    help="overwrites the path leading to " "variant configuration file to use",
+)
+@click.option(
+    "-a",
+    "--append-file",
+    show_default=True,
+    default=CONDA_RECIPE_APPEND,
+    help="overwrites the path leading to " "appended configuration file to use",
+)
+@click.option(
+    "-S",
+    "--server",
+    show_default=True,
+    default=SERVER,
+    help="Server used for downloading conda packages and documentation "
+    "indexes of required packages",
+)
+@click.option(
+    "-P",
+    "--private/--no-private",
+    default=False,
+    help="Set this to **include** private channels on your search - "
+    "you **must** be at Idiap to execute this build in this case - "
+    "you **must** also use the correct server name through --server - "
+    "notice this option has no effect to conda if you also pass --condarc",
+)
+@click.option(
+    "-X",
+    "--stable/--no-stable",
+    default=False,
+    help="Set this to **exclude** beta channels from your build - "
+    "notice this option has no effect if you also pass --condarc",
+)
+@click.option(
+    "-C",
+    "--ci/--no-ci",
+    default=False,
+    hidden=True,
+    help="Use this flag to indicate the graph will be running on the CI",
+)
+@click.option(
+    "-n",
+    "--name",
+    show_default=True,
+    default="graph",
+    help="set the graph name",
+)
+@click.option(
+    "-f",
+    "--format",
+    show_default=True,
+    default="svg",
+    help="determines the type of output to expect",
 )
+@click.option(
+    "-w",
+    "--whitelist",
+    show_default=True,
+    default="^(bob|beat|batl|gridtk)(\.)?(?!-).*$",
+    help="package regular expression to preserve in the graph, "
+    "use .* for keeping all packages, including non-maintained ones.  The "
+    "current expression accepts most of our packages, excluding bob/beat-devel")
 @verbosity_option()
-def graph(package_name, output_file, channel, prefix):
+@bdt.raise_on_error
+def graph(package, python, condarc, config, append_file, server, private,
+        stable, ci, name, format, whitelist):
     """
-    Compute the dependency graph of a conda package and save it in an SVG file using graphviz.
+    Computes the dependency graph of a gitlab package (via its conda recipe)
+    and outputs an dot file that can be used by graphviz to draw a direct
+    acyclic graph (DAG) of package dependencies.
+
+    This command uses the conda-build API to resolve the package dependencies.
     """
-    logger.info(f"Computing dependency graph")
-    graph_dict = compute_dependency_graph(package_name, channel=channel, prefix=prefix)
-    logger.info("Generating SVG")
-    get_graphviz_dependency_graph(graph_dict, output_file, prefix=prefix)
+
+    if "/" not in package:
+        raise RuntimeError('PACKAGE should be specified as "group/name"')
+
+    package_group, package_name = package.split('/', 1)
+
+    gl = get_gitlab_instance()
+
+    # get potential channel upload and other auxiliary channels
+    channels = get_channels(
+        public=(not private),
+        stable=stable,
+        server=server,
+        intranet=ci,
+        group=package_group,
+    )
+
+    if condarc is not None:
+        logger.info("Loading CONDARC file from %s...", condarc)
+        with open(condarc, "rb") as f:
+            condarc_options = yaml.load(f, Loader=yaml.FullLoader)
+    else:
+        # use default and add channels
+        all_channels = []
+        all_channels += channels + ["defaults"]
+        condarc_options = yaml.load(BASE_CONDARC, Loader=yaml.FullLoader)
+        logger.info(
+            "Using the following channels during build:\n  - %s",
+            "\n  - ".join(all_channels),
+        )
+        condarc_options["channels"] = all_channels
+
+    conda_config = make_conda_config(
+        config, python, append_file, condarc_options
+    )
+
+    set_environment("MATPLOTLIBRC", MATPLOTLIB_RCDIR)
+
+    # setup BOB_DOCUMENTATION_SERVER environment variable (used for bob.extension
+    # and derived documentation building via Sphinx)
+    set_environment("DOCSERVER", server)
+    set_environment("BOB_DOCUMENTATION_SERVER", "/not/set")
+
+    adj_matrix = compute_adjencence_matrix(gl, package, conda_config,
+            channels[0])
+
+    graph = generate_graph(adj_matrix, deptypes=[], whitelist=whitelist)
+    graph.render(name, format=format, cleanup=True)
diff --git a/bob/devtools/scripts/test.py b/bob/devtools/scripts/test.py
index d1f28ae9..b000ca8a 100644
--- a/bob/devtools/scripts/test.py
+++ b/bob/devtools/scripts/test.py
@@ -15,7 +15,10 @@ from ..build import (
     make_conda_config,
     get_docserver_setup,
     get_env_directory,
+    remove_conda_loggers,
 )
+remove_conda_loggers()
+
 from ..constants import (
     CONDA_BUILD_CONFIG,
     CONDA_RECIPE_APPEND,
-- 
GitLab