diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 77ca97b6a25ae52f0d3b8e5268fa9ca48e86d01f..3ac424d9886061339b072ee9484bf3af578a6d75 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -24,8 +24,6 @@ stages: cache: paths: - miniconda.sh - - ${CONDA_ROOT}/pkgs/*.tar.bz2 - - ${CONDA_ROOT}/pkgs/urls.txt .build_linux_template: @@ -107,8 +105,6 @@ build_macosx_37: cache: paths: - miniconda.sh - - ${CONDA_ROOT}/pkgs/*.tar.bz2 - - ${CONDA_ROOT}/pkgs/urls.txt deploy_beta: @@ -150,5 +146,3 @@ pypi: cache: paths: - miniconda.sh - - ${CONDA_ROOT}/pkgs/*.tar.bz2 - - ${CONDA_ROOT}/pkgs/urls.txt diff --git a/bob/devtools/bootstrap.py b/bob/devtools/bootstrap.py index f658ee8731e2d821881dba05f5fa75acb4fe5e5e..a79709c8af85332405a4a04ec6f836122336a8c1 100644 --- a/bob/devtools/bootstrap.py +++ b/bob/devtools/bootstrap.py @@ -46,6 +46,62 @@ import logging logger = logging.getLogger(__name__) +def do_hack(project_dir): + """ + This function is supposed to be for temporary usage. + + It implements hacks for the issues: https://gitlab.idiap.ch/bob/bob.devtools/merge_requests/112 + and https://github.com/conda/conda-build/issues/3767) + + """ + + #### HACK to avoid ripgrep ignoring bin/ directories in our checkouts + import shutil + + git_ignore_file = os.path.join(project_dir, ".gitignore") + if os.path.exists(git_ignore_file): + logger.warning('Removing ".gitignore" to overcome issues with ripgrep') + logger.warning( + "See https://gitlab.idiap.ch/bob/bob.devtools/merge_requests/112" + ) + os.unlink(git_ignore_file) + #### END OF HACK + + #### HACK that avoids this issue: https://github.com/conda/conda-build/issues/3767 + license_file = os.path.join(project_dir, "LICENSE") + if not os.path.exists(license_file): + license_file = os.path.join(project_dir, "LICENSE.AGPL") + + recipe_dir = os.path.join(project_dir, "conda") + if os.path.exists(license_file) and os.path.exists(recipe_dir): + logger.warning( + "Copying LICENSE file to `./conda` dir to avoid issue with conda build (https://github.com/conda/conda-build/issues/3767)" + ) + logger.warning( + "Replacing ../LICENSE to LICENSE (https://github.com/conda/conda-build/issues/3767)" + ) + shutil.copyfile( + license_file, + os.path.join(recipe_dir, os.path.basename(license_file)), + ) + + # Checking COPYING file just in case + copying_file = os.path.join(project_dir, "COPYING") + if os.path.exists(copying_file): + shutil.copyfile(copying_file, os.path.join(recipe_dir, "COPYING")) + + meta_file = os.path.join(recipe_dir, "meta.yaml") + recipe = open(meta_file).readlines() + recipe = [ + l.replace("../COPYING", "COPYING") + .replace("../LICENSE", "LICENSE") + .replace("../LICENSE.AGPL", "LICENSE.AGPL") + for l in recipe + ] + open(meta_file, "wt").write("".join(recipe)) + #### END OF HACK + + def set_environment(name, value, env=os.environ): """Function to setup the environment variable and print debug message. @@ -208,44 +264,47 @@ def ensure_miniconda_sh(): installer. """ - server = "repo.continuum.io" # https - # WARNING: if you update this version, remember to update hahes below - path = "/miniconda/Miniconda3-4.6.14-%s-x86_64.sh" + path = "/miniconda/Miniconda3-4.7.12-%s-x86_64.sh" if platform.system() == "Darwin": - md5sum = 'ffa5f0eead5576fb26b7e6902f5eed09' + md5sum = "677f38d5ab7e1ce4fef134068e3bd76a" path = path % "MacOSX" else: - md5sum = '718259965f234088d785cad1fbd7de03' + md5sum = "0dba759b8ecfc8948f626fa18785e3d8" path = path % "Linux" if os.path.exists("miniconda.sh"): logger.info("(check) miniconda.sh md5sum (== %s?)", md5sum) import hashlib + actual_md5 = hashlib.md5(open("miniconda.sh", "rb").read()).hexdigest() if actual_md5 == md5sum: logger.info("Re-using cached miniconda3 installer (hash matches)") return else: - logger.info("Erasing cached miniconda3 installer (%s does NOT " \ - "match)", actual_md5) + logger.info( + "Erasing cached miniconda3 installer (%s does NOT " "match)", + actual_md5, + ) os.unlink("miniconda.sh") # re-downloads installer import http.client - logger.info("Connecting to https://%s...", server) - conn = http.client.HTTPSConnection(server) + server = "www.idiap.ch" # http + + logger.info("Connecting to http://%s...", server) + conn = http.client.HTTPConnection(server) conn.request("GET", path) r1 = conn.getresponse() assert r1.status == 200, ( - "Request for https://%s%s - returned status %d " + "Request for http://%s%s - returned status %d " "(%s)" % (server, path, r1.status, r1.reason) ) dst = "miniconda.sh" - logger.info("(download) https://%s%s -> %s...", server, path, dst) + logger.info("(download) http://%s%s -> %s...", server, path, dst) with open(dst, "wb") as f: f.write(r1.read()) @@ -280,8 +339,8 @@ def install_miniconda(prefix, name): def get_channels(public, stable, server, intranet, group): """Returns the relevant conda channels to consider if building project. - The subset of channels to be returned depends on the visibility and stability - of the package being built. Here are the rules: + The subset of channels to be returned depends on the visibility and + stability of the package being built. Here are the rules: * public and stable: only returns the public stable channel(s) * public and not stable: returns both public stable and beta channels @@ -301,9 +360,10 @@ def get_channels(public, stable, server, intranet, group): server: The base address of the server containing our conda channels intranet: Boolean indicating if we should add "private"/"public" prefixes on the conda paths - group: The group of packages (gitlab namespace) the package we're compiling - is part of. Values should match URL namespaces currently available on - our internal webserver. Currently, only "bob" or "beat" will work. + group: The group of packages (gitlab namespace) the package we're + compiling is part of. Values should match URL namespaces currently + available on our internal webserver. Currently, only "bob" or "beat" + will work. Returns: a list of channels that need to be considered. @@ -431,6 +491,12 @@ if __name__ == "__main__": setup_logger(logger, args.verbose) + # Run conda-build hacks + # TODO: Remove this hack as soon as possible + do_hack(".") + + condarc = os.path.join(args.conda_root, "condarc") + install_miniconda(args.conda_root, args.name) conda_bin = os.path.join(args.conda_root, "bin", "conda") @@ -442,13 +508,15 @@ if __name__ == "__main__": # http://www.idiap.ch/software/bob/defaults with so it is optimized for # a CI build. Notice we consider this script is only executed in this # context. The URL should NOT work outside of Idiap's network. - f.write(_BASE_CONDARC.replace( - 'https://repo.anaconda.com/pkgs/main', - 'http://www.idiap.ch/defaults', - )) + f.write( + _BASE_CONDARC.replace( + "https://repo.anaconda.com/pkgs/main", + "http://www.idiap.ch/defaults", + ) + ) conda_version = "4" - conda_build_version = "3.16" + conda_build_version = "3" conda_verify_version = "3" conda_verbosity = [] diff --git a/bob/devtools/build.py b/bob/devtools/build.py index 1ad84e52868c1259681b57adf68d3a5163a7a412..81a17b07a6f4b6a65b2d7f072c85eb0613a82e3b 100644 --- a/bob/devtools/build.py +++ b/bob/devtools/build.py @@ -491,13 +491,9 @@ def git_clean_build(runner, verbose): # glob wild card entries we'd like to keep exclude_from_cleanup = [ "miniconda.sh", # the installer, cached - "miniconda/pkgs/urls.txt", # download index, cached "sphinx", # build artifact -- documentation ] - # cache - exclude_from_cleanup += ["miniconda/pkgs/"] - # artifacts exclude_from_cleanup += ["miniconda/conda-bld/"] exclude_from_cleanup += glob.glob("dist/*.zip") diff --git a/bob/devtools/data/conda_build_config.yaml b/bob/devtools/data/conda_build_config.yaml index 687f865689962fada9d0415bc27273963241b727..6946af980ee60bb08075b245ffc467f8431217fd 100644 --- a/bob/devtools/data/conda_build_config.yaml +++ b/bob/devtools/data/conda_build_config.yaml @@ -51,12 +51,12 @@ zip_keys: # This version of bob-devel will be used at test time of packages: bob_devel: - - 2019.09.05 + - 2019.10.17 # This version of beat-devel will be used at test time of packages. Notice it # uses bob-devel and should have a version that is greater or equal its value beat_devel: - - 2019.09.11 + - 2019.10.17 # The build time only dependencies (build requirements). # Updating these to the latest version all the time is OK and a good idea. diff --git a/bob/devtools/data/gitlab-ci/base-build.yaml b/bob/devtools/data/gitlab-ci/base-build.yaml index 4b97b9cf0b4ca9d7e71ae5aec2690d06756874b5..3007c5e327fb453c28f4bbff6b4d0c793948e516 100644 --- a/bob/devtools/data/gitlab-ci/base-build.yaml +++ b/bob/devtools/data/gitlab-ci/base-build.yaml @@ -30,8 +30,6 @@ stages: cache: paths: - miniconda.sh - - ${CONDA_ROOT}/pkgs/*.tar.bz2 - - ${CONDA_ROOT}/pkgs/urls.txt build_linux: diff --git a/bob/devtools/data/gitlab-ci/docs.yaml b/bob/devtools/data/gitlab-ci/docs.yaml index 608ebdc83180ddc620ecef633f134bf1e5705182..79f16f40c2ea8b2b640d75060922f65f470e6041 100644 --- a/bob/devtools/data/gitlab-ci/docs.yaml +++ b/bob/devtools/data/gitlab-ci/docs.yaml @@ -25,8 +25,6 @@ stages: cache: paths: - miniconda.sh - - ${CONDA_ROOT}/pkgs/*.tar.bz2 - - ${CONDA_ROOT}/pkgs/urls.txt # Build target diff --git a/bob/devtools/data/gitlab-ci/nightlies.yaml b/bob/devtools/data/gitlab-ci/nightlies.yaml index e18e674f4a46ad2714cf1eb865ab781c10fef9bb..ed9ba7edadbece9523407189cce26a115df29345 100644 --- a/bob/devtools/data/gitlab-ci/nightlies.yaml +++ b/bob/devtools/data/gitlab-ci/nightlies.yaml @@ -24,8 +24,6 @@ stages: cache: paths: - miniconda.sh - - ${CONDA_ROOT}/pkgs/*.tar.bz2 - - ${CONDA_ROOT}/pkgs/urls.txt # Build targets diff --git a/bob/devtools/data/gitlab-ci/single-package.yaml b/bob/devtools/data/gitlab-ci/single-package.yaml index 3ded4f6f5985dcc37b4d64c81754d882ddf5d7ce..03e8e72bbbc43de3c0176f5b298b7e8085706357 100644 --- a/bob/devtools/data/gitlab-ci/single-package.yaml +++ b/bob/devtools/data/gitlab-ci/single-package.yaml @@ -26,8 +26,6 @@ stages: cache: paths: - miniconda.sh - - ${CONDA_ROOT}/pkgs/*.tar.bz2 - - ${CONDA_ROOT}/pkgs/urls.txt # Build targets @@ -109,8 +107,6 @@ build_linux_37: cache: paths: - miniconda.sh - - ${CONDA_ROOT}/pkgs/*.tar.bz2 - - ${CONDA_ROOT}/pkgs/urls.txt .test_linux_template: @@ -143,8 +139,6 @@ build_linux_37: cache: paths: - miniconda.sh - - ${CONDA_ROOT}/pkgs/*.tar.bz2 - - ${CONDA_ROOT}/pkgs/urls.txt cache: key: "deploy" diff --git a/bob/devtools/mirror.py b/bob/devtools/mirror.py new file mode 100644 index 0000000000000000000000000000000000000000..a1055c1223118578a2008a04797d579045b80995 --- /dev/null +++ b/bob/devtools/mirror.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + + +'''Mirroring functionality for conda channels + +Some constructs are bluntly copied from +https://github.com/valassis-digital-media/conda-mirror +''' + +import os +import bz2 +import json +import time +import random +import hashlib +import fnmatch +import tempfile + +import requests + +from .log import get_logger +logger = get_logger(__name__) + + + +def _download(url, target_directory): + """Download `url` to `target_directory` + + Parameters + ---------- + url : str + The url to download + target_directory : str + The path to a directory where `url` should be downloaded + + Returns + ------- + file_size: int + The size in bytes of the file that was downloaded + """ + + file_size = 0 + chunk_size = 1024 # 1KB chunks + logger.info("Download %s -> %s", url, target_directory) + # create a temporary file + target_filename = url.split('/')[-1] + download_filename = os.path.join(target_directory, target_filename) + with open(download_filename, 'w+b') as tf: + ret = requests.get(url, stream=True) + size = ret.headers.get('Content-length', '??') + logger.debug('Saving to %s (%s bytes)', download_filename, size) + for data in ret.iter_content(chunk_size): + tf.write(data) + file_size = os.path.getsize(download_filename) + return file_size + + +def _list_conda_packages(local_dir): + """List the conda packages (*.tar.bz2 or *.conda files) in `local_dir` + + Parameters + ---------- + local_dir : str + Some local directory with (hopefully) some conda packages in it + + Returns + ------- + list + List of conda packages in `local_dir` + """ + contents = os.listdir(local_dir) + return fnmatch.filter(contents, "*.conda") + \ + fnmatch.filter(contents, "*.tar.bz2") + + +def get_json(channel, platform, name): + """Get a JSON file for a channel/platform combo on conda channel + + Parameters + ---------- + channel : str + Complete channel URL + platform : {'linux-64', 'osx-64', 'noarch'} + The platform of interest + name : str + The name of the file to retrieve. If the name ends in '.bz2', then it + is auto-decompressed + + Returns + ------- + repodata : dict + contents of repodata.json + """ + + url = channel + '/' + platform + '/' + name + logger.debug('[checking] %s...', url) + r = requests.get(url, allow_redirects=True, stream=True) + size = r.headers.get('Content-length', '??') + logger.info('[download] %s (%s bytes)...', url, size) + + if name.endswith('.bz2'): + # just in case transport encoding was applied + r.raw.decode_content = True + data = bz2.decompress(r.raw.read()) + return json.loads(data) + + # else, just decodes the response + return r.json() + + +def get_local_contents(path, arch): + """Returns the local package contents as a set""" + + path_arch = os.path.join(path, arch) + if not os.path.exists(path_arch): + return set() + + # path exists, lists currently available packages + logger.info('Listing package contents of %s...', path_arch) + contents = os.listdir(path_arch) + return set(fnmatch.filter(contents, '*.tar.bz2') + + fnmatch.filter(contents, '*.conda')) + + +def load_glob_list(path): + """Loads a list of globs from a configuration file + + Excludes comments and empty lines + """ + + retval = [str(k.strip()) for k in open(path, "rt")] + return [k for k in retval if k and k[0] not in ("#", "-")] + + +def blacklist_filter(packages, globs): + """Filters **out** the input package set with the glob list""" + + to_remove = set() + for k in globs: + to_remove |= set(fnmatch.filter(packages, k)) + return packages - to_remove + + +def whitelist_filter(packages, globs): + """Filters **in** the input package set with the glob list""" + + to_keep = set() + for k in globs: + to_keep |= set(fnmatch.filter(packages, k)) + return to_keep + + +def _sha256sum(filename): + """Calculates and returns the sha-256 sum given a file name""" + + h = hashlib.sha256() + b = bytearray(128*1024) + mv = memoryview(b) + with open(filename, 'rb', buffering=0) as f: + for n in iter(lambda : f.readinto(mv), 0): + h.update(mv[:n]) + return h.hexdigest() + + +def _md5sum(filename): + """Calculates and returns the md5 sum given a file name""" + + h = hashlib.md5() + b = bytearray(128*1024) + mv = memoryview(b) + with open(filename, 'rb', buffering=0) as f: + for n in iter(lambda : f.readinto(mv), 0): + h.update(mv[:n]) + return h.hexdigest() + + +def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run): + """Downloads remote packages to a download directory + + Packages are downloaded first to a temporary directory, then validated + according to the expected sha256/md5 sum and then moved, one by one, to the + destination directory. An error is raised if the package cannot be + correctly downloaded. + + Parameters + ---------- + packages : list of str + List of packages to download from the remote channel + repodata: dict + A dictionary containing the remote repodata.json contents + channel_url: str + The complete channel URL + dest_dir: str + The local directory where the channel is being mirrored + arch: str + The current architecture which we are mirroring + dry_run: bool + A boolean flag indicating if this is just a dry-run (simulation), + flagging so we don't really do anything (set to ``True``). + + """ + + # download files into temporary directory, that is removed by the end of + # the procedure, or if something bad occurs + with tempfile.TemporaryDirectory() as download_dir: + + total = len(packages) + for k, p in enumerate(packages): + + k+=1 #adjust to produce correct order on printouts + + # checksum to verify + if p.endswith('.tar.bz2'): + expected_hash = repodata['packages'][p].get('sha256', + repodata['packages'][p]['md5']) + else: + expected_hash = repodata['packages.conda'][p].get('sha256', + repodata['packages.conda'][p]['md5']) + + # download package to file in our temporary directory + url = channel_url + '/' + arch + '/' + p + temp_dest = os.path.join(download_dir, p) + logger.info('[download: %d/%d] %s -> %s', k, total, url, temp_dest) + + package_retries = 10 + while package_retries: + + if not dry_run: + logger.debug('[checking: %d/%d] %s', k, total, url) + r = requests.get(url, stream=True, allow_redirects=True) + size = r.headers.get('Content-length', '??') + logger.info('[download: %d/%d] %s -> %s (%s bytes)', k, + total, url, temp_dest, size) + open(temp_dest, 'wb').write(r.raw.read()) + + # verify that checksum matches + if len(expected_hash) == 32: #md5 + logger.info('[verify: %d/%d] md5(%s) == %s?', k, total, + temp_dest, expected_hash) + else: #sha256 + logger.info('[verify: %d/%d] sha256(%s) == %s?', k, total, + temp_dest, expected_hash) + + if not dry_run: + if len(expected_hash) == 32: #md5 + actual_hash = _md5sum(temp_dest) + else: #sha256 + actual_hash = _sha256sum(temp_dest) + + if actual_hash != expected_hash: + wait_time = random.randint(10,61) + logger.warning('Checksum of locally downloaded ' \ + 'version of %s does not match ' \ + '(actual:%r != %r:expected) - retrying ' \ + 'after %d seconds', url, actual_hash, + expected_hash, wait_time) + os.unlink(temp_dest) + time.sleep(wait_time) + package_retries -= 1 + continue + else: + break + + # final check, before we continue + assert actual_hash == expected_hash, 'Checksum of locally ' \ + 'downloaded version of %s does not match ' \ + '(actual:%r != %r:expected)' % (url, actual_hash, + expected_hash) + + # move + local_dest = os.path.join(dest_dir, arch, p) + logger.info('[move: %d/%d] %s -> %s', k, total, temp_dest, + local_dest) + + # check local directory is available before moving + dirname = os.path.dirname(local_dest) + if not os.path.exists(dirname): + logger.info('[mkdir] %s', dirname) + if not dry_run: + os.makedirs(dirname) + + if not dry_run: + os.rename(temp_dest, local_dest) + + +def remove_packages(packages, dest_dir, arch, dry_run): + """Removes local packages that no longer matter""" + + total = len(packages) + for k, p in enumerate(packages): + k+=1 #adjust to produce correct order on printouts + path = os.path.join(dest_dir, arch, p) + logger.info('[remove: %d/%d] %s', k, total, path) + if not dry_run: + os.unlink(path) + + +def _cleanup_json(data, packages): + """Cleans-up the contents of conda JSON looking at existing packages""" + + # only keys to clean-up here, othere keys remain unchanged + for key in ('packages', 'packages.conda'): + if key not in data: continue + data[key] = dict((k,v) for k,v in data[key].items() if k in packages) + + return data + + +def _save_json(data, dest_dir, arch, name, dry_run): + """Saves contents of conda JSON""" + + destfile = os.path.join(dest_dir, arch, name) + if not dry_run: + with open(destfile, 'w') as outfile: + json.dump(data, outfile, ensure_ascii=True, indent=2) + return destfile + + +def copy_and_clean_json(url, dest_dir, arch, name, dry_run): + """Copies and cleans conda JSON file""" + + data = get_json(url, arch, name) + packages = get_local_contents(dest_dir, arch) + data = _cleanup_json(data, packages) + return _save_json(data, dest_dir, arch, name, dry_run) + + +def copy_and_clean_patch(url, dest_dir, arch, name, dry_run): + """Copies and cleans conda patch_instructions JSON file""" + + data = get_json(url, arch, name) + packages = get_local_contents(dest_dir, arch) + data = _cleanup_json(data, packages) + + # cleanup specific patch_instructions.json fields + for key in ["remove", "revoke"]: + data[key] = [k for k in data[key] if k in packages] + + return _save_json(data, dest_dir, arch, name, dry_run) + + +def checksum_packages(repodata, dest_dir, arch, packages): + """Checksums packages on the local mirror and compare to remote repository + + Parameters + ---------- + repodata : dict + Data loaded from `repodata.json` on the remote repository + dest_dir : str + Path leading to local mirror + arch : str + Current architecture being considered (e.g. noarch, linux-64 or osx-64) + packages : list + List of packages that are available locally, by name + + Returns + ------- + issues : list + List of matching errors + """ + + issues = set() + total = len(packages) + for k, p in enumerate(packages): + + path_to_package = os.path.join(dest_dir, arch, p) + + # checksum to verify + if p.endswith('.tar.bz2'): + expected_hash = repodata['packages'][p].get('sha256', + repodata['packages'][p]['md5']) + else: + expected_hash = repodata['packages.conda'][p].get('sha256', + repodata['packages.conda'][p]['md5']) + + # verify that checksum matches + if len(expected_hash) == 32: #md5 + logger.debug('[verify: %d/%d] md5(%s) == %s?', k, total, + path_to_package, expected_hash) + else: #sha256 + logger.debug('[verify: %d/%d] sha256(%s) == %s?', k, total, + path_to_package, expected_hash) + + if len(expected_hash) == 32: #md5 + actual_hash = _md5sum(path_to_package) + else: #sha256 + actual_hash = _sha256sum(path_to_package) + + if actual_hash != expected_hash: + logger.warning('Checksum of %s does not match remote ' \ + 'repository description (actual:%r != %r:expected)', + path_to_package, actual_hash, expected_hash) + issues.add(p) + + return issues diff --git a/bob/devtools/pipelines.py b/bob/devtools/pipelines.py new file mode 100644 index 0000000000000000000000000000000000000000..29ecf8b51f3b7de0efaefad0bcb9fbcc3fb47017 --- /dev/null +++ b/bob/devtools/pipelines.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Pipeline utilities""" + +from tabulate import tabulate +import re +from datetime import datetime + + +def process_log(log): + """ + Summarizes the execution time of a pipeline given its Job log + """ + + current_package = None + logs = dict() + dates = [] + for l in log: + + # Check which package are we + if len(re.findall("Building bob/[a-z]*", l)) > 0: + logs[current_package] = dates + dates = [] + + pattern = re.findall("Building bob/[a-z]*", l)[0] + current_package = l[9:-1] + continue + + # Checking the date + date = re.findall( + "[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2} [0-9]{2,2}:[0-9]{2,2}:[0-9]{2,2}", l + ) + if len(date) > 0: + # logs[date[0]]=current_package + dates.append(date[0]) + + ## Last log + if len(dates) > 0: + logs[current_package] = dates + + table = [] + for k in logs: + first = datetime.strptime(logs[k][0], "%Y-%m-%d %H:%M:%S") + last = datetime.strptime(logs[k][-1], "%Y-%m-%d %H:%M:%S") + delta = ((last - first).seconds) / 60 + + table.append([str(k), str(first), str(round(delta, 2)) + "m"]) + + print(tabulate(table)) diff --git a/bob/devtools/scripts/build.py b/bob/devtools/scripts/build.py index 193b8e596a93f9fc8cda03d538c55d26e91f92d9..263b98f58838f49e36e32f94a77e0dd924a4c584 100644 --- a/bob/devtools/scripts/build.py +++ b/bob/devtools/scripts/build.py @@ -177,6 +177,13 @@ def build( group, ) + #### HACK to avoid ripgrep ignoring bin/ directories in our checkouts + # TODO: Remove this hack as soon as possible + from bob.devtools.bootstrap import do_hack + project_dir = os.path.dirname(recipe_dir[0]) + do_hack(project_dir) + + # get potential channel upload and other auxiliary channels channels = get_channels( public=(not private), diff --git a/bob/devtools/scripts/mirror.py b/bob/devtools/scripts/mirror.py new file mode 100644 index 0000000000000000000000000000000000000000..5a3034c0d141108f60aa4d1ee4e93feb23705134 --- /dev/null +++ b/bob/devtools/scripts/mirror.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + + +import os +import click +import tempfile + +import conda_build.api + +from . import bdt +from ..mirror import ( + get_json, + get_local_contents, + load_glob_list, + blacklist_filter, + whitelist_filter, + download_packages, + remove_packages, + copy_and_clean_patch, + checksum_packages, + ) +from ..log import verbosity_option, get_logger, echo_info, echo_warning + +logger = get_logger(__name__) + + +@click.command( + epilog=""" +Examples: + + 1. Mirrors a conda channel: + +\b + $ bdt mirror -vv https://www.idiap.ch/software/bob/label/beta + + """ +) +@click.argument( + "channel-url", + required=True, +) +@click.argument( + "dest-dir", + type=click.Path(exists=False, dir_okay=True, file_okay=False, + writable=True, readable=True, resolve_path=True), + required=True, +) +@click.option( + "-b", + "--blacklist", + type=click.Path(exists=True, dir_okay=False, file_okay=True, + readable=True, resolve_path=True), + help="A file containing a list of globs to exclude from local " \ + "mirroring, one per line", +) +@click.option( + "-w", + "--whitelist", + type=click.Path(exists=True, dir_okay=False, file_okay=True, + readable=True, resolve_path=True), + help="A file containing a list of globs to include at local " \ + "mirroring, one per line. This is considered *after* " \ + "the blacklisting. It is here just for testing purposes", +) +@click.option( + "-m", + "--check-md5/--no-check-md5", + default=False, + help="If set, then check MD5 sums of all packages during conda-index", +) +@click.option( + "-d", + "--dry-run/--no-dry-run", + default=False, + help="Only goes through the actions, but does not execute them " + "(combine with the verbosity flags - e.g. ``-vvv``) to enable " + "printing to help you understand what will be done", +) +@click.option( + "-t", + "--tmpdir", + type=click.Path(exists=True, dir_okay=True, file_okay=False, + readable=True, writable=True, resolve_path=True), + help="A directory where to store temporary files", +) +@click.option( + "-p", + "--patch/--no-patch", + default=False, + help="If set, then consider we are mirroring the defaults channel " + "where a patch_instructions.json exists and must be downloaded and " + "prunned so the mirror works adequately", +) +@click.option( + "-c", + "--checksum/--no-checksum", + default=False, + help="If set, then packages that are supposed to be kept locally " + "will be checksummed against the remote repository repodata.json " + "expections. Errors will be reported and packages will be " + "removed from the local repository", +) +@verbosity_option() +@bdt.raise_on_error +def mirror( + channel_url, + dest_dir, + blacklist, + whitelist, + check_md5, + dry_run, + tmpdir, + patch, + checksum, + ): + """Mirrors a conda channel to a particular local destination + + This command is capable of completely mirroring a valid conda channel, + excluding packages that you may not be interested on via globs. It works + to minimize channel usage by first downloading the channel repository data + (in compressed format), analysing what is available locally and what is + available on the channel, and only downloading the missing files. + """ + + # creates a self destructing temporary directory that will act as temporary + # directory for the rest of this program + tmpdir2 = tempfile.TemporaryDirectory(prefix='bdt-mirror-tmp', dir=tmpdir) + tempfile.tempdir = tmpdir2.name + os.environ['TMPDIR'] = tmpdir2.name + logger.info('Setting $TMPDIR and `tempfile.tempdir` to %s', tmpdir2.name) + + # if we are in a dry-run mode, let's let it be known + if dry_run: + logger.warn("!!!! DRY RUN MODE !!!!") + logger.warn("Nothing will be really mirrored") + + DEFAULT_SUBDIRS = ['noarch', 'linux-64', 'osx-64'] + + noarch = os.path.join(dest_dir, 'noarch') + if not os.path.exists(noarch): #first time + # calls conda index to create basic infrastructure + logger.info("Creating conda channel at %s...", dest_dir) + if not dry_run: + conda_build.api.update_index([dest_dir], subdir=DEFAULT_SUBDIRS, + progress=False, verbose=False) + + for arch in DEFAULT_SUBDIRS: + + remote_repodata = get_json(channel_url, arch, 'repodata.json.bz2') + logger.info('%d packages available in remote index', + len(remote_repodata.get('packages', {}))) + local_packages = get_local_contents(dest_dir, arch) + logger.info('%d packages available in local mirror', len(local_packages)) + + remote_packages = set(list(remote_repodata.get('packages', {}).keys()) + + list(remote_repodata.get('packages.conda', {}).keys())) + + if blacklist is not None and os.path.exists(blacklist): + globs_to_remove = set(load_glob_list(blacklist)) + else: + globs_to_remove = set() + + # in the remote packages, subset those that need to be downloaded + # according to our own interest + to_download = blacklist_filter(remote_packages - local_packages, + globs_to_remove) + + if whitelist is not None and os.path.exists(whitelist): + globs_to_consider = set(load_glob_list(whitelist)) + to_download = whitelist_filter(to_download, globs_to_consider) + + # in the local packages, subset those that we no longer need, be it + # because they have been removed from the remote repository, or because + # we decided to blacklist them. + disappeared_remotely = local_packages - remote_packages + to_keep = blacklist_filter(local_packages, globs_to_remove) + to_delete_locally = (local_packages - to_keep) | disappeared_remotely + + # execute the transaction + if checksum: + # double-check if, among packages I should keep, everything looks + # already with respect to expected checksums from the remote repo + issues = checksum_packages(remote_repodata, dest_dir, arch, + to_keep) + if issues: + echo_warning("Detected %d packages with checksum issues - " \ + "re-downloading after erasing..." % len(issues)) + else: + echo_info("All local package checksums match expected values") + remove_packages(issues, dest_dir, arch, dry_run) + to_download |= issues + + if to_download: + download_packages(to_download, remote_repodata, channel_url, + dest_dir, arch, dry_run) + else: + echo_info("Mirror at %s/%s is up-to-date w.r.t. %s/%s. " \ + "No packages to download." % (dest_dir, arch, channel_url, + arch)) + + if to_delete_locally: + echo_warning("%d packages will be removed at %s/%s" % \ + (len(to_delete_locally), dest_dir, arch)) + remove_packages(to_delete_locally, dest_dir, arch, dry_run) + else: + echo_info("Mirror at %s/%s is up-to-date w.r.t. blacklist. " \ + "No packages to be removed." % (dest_dir, arch)) + + if patch: + # download/cleanup patch instructions, otherwise conda installs may + # go crazy. Do this before the indexing, that will use that file + # to do its magic. + patch_file = 'patch_instructions.json' + name = copy_and_clean_patch(channel_url, dest_dir, arch, + patch_file, dry_run) + echo_info("Cleaned copy of %s/%s/%s installed at %s" % + (channel_url, arch, patch_file, name)) + + # re-indexes the channel to produce a conda-compatible setup + echo_info("Re-indexing %s..." % dest_dir) + if not dry_run: + from conda_build.index import MAX_THREADS_DEFAULT + conda_build.api.update_index([dest_dir], check_md5=check_md5, + progress=True, verbose=False, subdir=DEFAULT_SUBDIRS, + threads=MAX_THREADS_DEFAULT) diff --git a/bob/devtools/scripts/pipelines.py b/bob/devtools/scripts/pipelines.py new file mode 100644 index 0000000000000000000000000000000000000000..7d21bc068ba3377df469835fd861be981e89adf3 --- /dev/null +++ b/bob/devtools/scripts/pipelines.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python + +import os + +import click +import gitlab +import urllib + +from . import bdt +from ..release import get_gitlab_instance + +from ..log import verbosity_option, get_logger, echo_normal, echo_warning +from ..pipelines import process_log +logger = get_logger(__name__) +from tabulate import tabulate + +@click.command( + epilog=""" +Examples: + + 1. Process all the jobs from a particular pipeline + + $ bdt gitlab process-pipelines bob/bob.nightlies pipelines + + 2. Process a particular job from a pipeline + + $ bdt gitlab process-pipelines bob/bob.nightlies pipelines --job-id xxx + +""" +) +@click.argument("package") +@click.argument("pipeline") +@click.option('--job-id', default=None, help='A job id from a pipeline') +@verbosity_option() +@bdt.raise_on_error +def process_pipelines(package, pipeline, job_id): + """Returns the last tag information on a given PACKAGE.""" + + if "/" not in package: + raise RuntimeError('PACKAGE should be specified as "group/name"') + + gl = get_gitlab_instance() + + # we lookup the gitlab package once + try: + project = gl.projects.get(package) + pipeline = project.pipelines.get(pipeline) + + jobs = [j for j in pipeline.jobs.list()] + if job_id is not None: + jobs = [j for j in jobs if int(j.attributes["id"])==int(job_id)] + + + if(len(jobs) == 0 ): + print("Job %s not found in the pipeline %s. Use `bdt gitlab get-pipelines` to search " % (job_id, pipeline.attributes["id"])) + + # Reading log + try: + for j in jobs: + print("Pipeline %s, Job %s" % (pipeline.attributes["id"], int(j.attributes["id"]))) + web_url = j.attributes["web_url"] + "/raw" + log = str(urllib.request.urlopen(web_url).read()).split("\\n") + process_log(log) + except urllib.error.HTTPError as e: + logger.warn( + "Gitlab access error - Log %s can't be found" % web_url, package + ) + echo_warning("%s: unknown" % (package,)) + + logger.info( + "Found gitlab project %s (id=%d)", + project.attributes["path_with_namespace"], + project.id, + ) + + pass + except gitlab.GitlabGetError as e: + logger.warn("Gitlab access error - package %s does not exist?", package) + echo_warning("%s: unknown" % (package,)) + + +@click.command( + epilog=""" +Examples: + + 1. Get the most recent pipelines from a particular project wit their corresponding job numbers + + $ bdt gitlab get-pipelines bob/bob.nightlies + + +""" +) +@click.argument("package") +@verbosity_option() +@bdt.raise_on_error +def get_pipelines(package): + """Returns the CI pipelines given a given PACKAGE.""" + + if "/" not in package: + raise RuntimeError('PACKAGE should be specified as "group/name"') + + gl = get_gitlab_instance() + + # we lookup the gitlab package once + try: + project = gl.projects.get(package) + logger.info( + "Found gitlab project %s (id=%d)", + project.attributes["path_with_namespace"], + project.id, + ) + + pipelines = project.pipelines.list() + description = [["Pipeline", "Branch", "Status", "Jobs"]] + for pipeline in pipelines: + jobs = [j.attributes["id"] for j in pipeline.jobs.list()] + description.append( + [ + "%s" % pipeline.attributes["id"], + "%s" % pipeline.attributes["ref"], + "%s" % pipeline.attributes["status"], + "%s" % jobs, + ] + ) + print("Jobs from project %s" % package) + print(tabulate(description)) + + except gitlab.GitlabGetError as e: + logger.warn("Gitlab access error - package %s does not exist?", package) + echo_warning("%s: unknown" % (package,)) diff --git a/bob/devtools/templates/conda/meta.yaml b/bob/devtools/templates/conda/meta.yaml index c5f17a28111eaa3164d891f9f877999e02f8d8b6..3a8517fdf42ea69bfd0c2b476a7d34b62014ed37 100644 --- a/bob/devtools/templates/conda/meta.yaml +++ b/bob/devtools/templates/conda/meta.yaml @@ -25,6 +25,7 @@ requirements: host: - python {{ python }} - setuptools {{ setuptools }} + - bob.extension # place your other host dependencies here run: - python diff --git a/bob/devtools/templates/requirements.txt b/bob/devtools/templates/requirements.txt index a85739ed615894896a53bef435746f1468b08146..089f1abe678a25e3bdd9f06da16152d3b03da86f 100644 --- a/bob/devtools/templates/requirements.txt +++ b/bob/devtools/templates/requirements.txt @@ -1,2 +1,3 @@ setuptools numpy +bob.extension \ No newline at end of file diff --git a/conda/meta.yaml b/conda/meta.yaml index 274aa6bfab5dd877d1b0af8764e17644e7ed59f7..df6141cd536413625bc1e6434408f5e7af3c74ba 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -32,7 +32,7 @@ requirements: - click >=7 - click-plugins - conda=4 - - conda-build=3.16 + - conda-build=3 - conda-verify=3 - certifi - docformatter @@ -48,9 +48,7 @@ requirements: - jinja2 - termcolor - psutil - # pin packaging temporarily until - # https://github.com/ContinuumIO/anaconda-issues/issues/11236 is fixed. - - packaging 19.0 + - tabulate test: requires: @@ -65,6 +63,7 @@ test: - bdt dumpsphinx https://docs.python.org/3/objects.inv > /dev/null - bdt create --help - bdt build --help + - bdt mirror --help - bdt rebuild --help - bdt test --help - bdt caupdate --help @@ -103,6 +102,8 @@ test: - bdt dav rmtree --help - bdt dav clean-betas --help - bdt dav upload --help + - bdt gitlab process-pipelines --help + - bdt gitlab get-pipelines --help - sphinx-build -aEW ${PREFIX}/share/doc/{{ name }}/doc sphinx - if [ -n "${CI_PROJECT_DIR}" ]; then mv sphinx "${CI_PROJECT_DIR}/"; fi diff --git a/doc/api.rst b/doc/api.rst index 0d20dd0edf411150bb4d04b7aad255585eb6c0ae..4b9a776a4b77535cb563ff196b1b6bd1c0aa8d2b 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -13,6 +13,7 @@ bob.devtools.changelog bob.devtools.bootstrap bob.devtools.build + bob.devtools.mirror bob.devtools.webdav3.client diff --git a/doc/development.rst b/doc/development.rst index b6875d5da50270910af0c01d18fb86ab046d0517..14b36068b4316626757511344fa34fc5fc32c93a 100644 --- a/doc/development.rst +++ b/doc/development.rst @@ -7,8 +7,7 @@ Very often, developers are confronted with the need to clone package repositories locally and develop installation/build and runtime code. It is recommended to create isolated environments to develop new projects using conda_ and zc.buildout_. -Tools implemented in `bob.devtools` helps automate this process for |project| packages. In the following we talk about how to checkout and build one or several packages from -their git_ source and build proper isolated environments to develop them. Then we will describe how to create a new bob package from scratch and develop existing bob packages along side it. +Tools implemented in `bob.devtools` helps automate this process for |project| packages. In the following we talk about how to checkout and build one or several packages from their git_ source and build proper isolated environments to develop them. Then we will describe how to create a new bob package from scratch and develop existing bob packages along side it. TLDR ==== @@ -147,6 +146,7 @@ Optionally: .. bob.devtools.local_development: +<<<<<<< HEAD Local development of existing packages ====================================== To develop existing |project| packages you need to checkout their source code and create a proper development environment using `buildout`. @@ -154,10 +154,8 @@ To develop existing |project| packages you need to checkout their source code an Checking out |project| package sources -------------------------------------- - |project| packages are developed through gitlab_. Various packages exist -in |project|'s gitlab_ instance. Here as an example we assume you want to install and build locally the `bob.blitz` pakcage. In order to checkout a -package, just use git_: +in |project|'s gitlab_ instance. Here as an example we assume you want to install and build locally the `bob.blitz` pakcage. In order to checkout a package, just use git_: .. code-block:: sh @@ -169,7 +167,6 @@ Create an isolated conda environment ------------------------------------ Now that we have the package checked out we need an isolated environment with proper configuration to develop the package. `bob.devtools` provides a tool that automatically creates such environment. - Before proceeding, you need to make sure that you already have a conda_ environment that has `bob.devtools` installed in. Refer to :ref:`bob.devtools.install` for installation information. Here we assume that you have a conda environment named `bdt` with installed `bob.devtools`. .. code-block:: sh diff --git a/doc/linux.rst b/doc/linux.rst index 434d7a2d81e23e45a3830e7baa62b812dd054309..6cfb281c3c28517ffd38ecbfcbfae6608a41f85b 100644 --- a/doc/linux.rst +++ b/doc/linux.rst @@ -218,46 +218,7 @@ Crontabs # crontab -l MAILTO="" - @reboot /root/docker-cleanup-service.sh - 0 0 * * * /root/docker-cleanup.sh - - -The `docker-cleanup-service.sh` is: - -.. code-block:: sh - - #!/usr/bin/env sh - - # Continuously running image to ensure minimal space is available - - docker run -d \ - -e LOW_FREE_SPACE=30G \ - -e EXPECTED_FREE_SPACE=50G \ - -e LOW_FREE_FILES_COUNT=2097152 \ - -e EXPECTED_FREE_FILES_COUNT=4194304 \ - -e DEFAULT_TTL=60m \ - -e USE_DF=1 \ - --restart always \ - -v /var/run/docker.sock:/var/run/docker.sock \ - --name=gitlab-runner-docker-cleanup \ - quay.io/gitlab/gitlab-runner-docker-cleanup - -The `docker-cleanup.sh` is: - -.. code-block:: sh - - #!/usr/bin/env sh - - # Cleans-up docker stuff which is not being used - - # Exited machines which are still dangling - #Caches are containers that we do not want to delete here - #echo "Cleaning exited machines..." - #docker rm -v $(docker ps -a -q -f status=exited) - - # Unused image leafs - echo "Removing unused image leafs..." - docker rmi $(docker images --filter "dangling=true" -q --no-trunc) + 0 12 * * SUN /usr/share/gitlab-runner/clear-docker-cache Conda and shared builds diff --git a/doc/templates.rst b/doc/templates.rst index fb7dfa7e3efb0ea20745a494d6e4427c82d43b91..e4662a0e757b406473f9b4f00e5a2116e1fb1d2f 100644 --- a/doc/templates.rst +++ b/doc/templates.rst @@ -148,6 +148,7 @@ Buildout.cfg in more details This section should include more information about different sections in a buildout.cfg file. + .. _bob.devtools.anatomy: Anatomy of a new package diff --git a/setup.py b/setup.py index a3d490ec52158faf0f15f483e47af5b5336b4804..1e729138cb3b20b99f87804623628cb94586277e 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ setup( 'dumpsphinx = bob.devtools.scripts.dumpsphinx:dumpsphinx', 'create = bob.devtools.scripts.create:create', 'build = bob.devtools.scripts.build:build', + 'mirror = bob.devtools.scripts.mirror:mirror', 'rebuild = bob.devtools.scripts.rebuild:rebuild', 'test = bob.devtools.scripts.test:test', 'caupdate = bob.devtools.scripts.caupdate:caupdate', @@ -66,6 +67,8 @@ setup( 'jobs = bob.devtools.scripts.jobs:jobs', 'visibility = bob.devtools.scripts.visibility:visibility', 'getpath = bob.devtools.scripts.getpath:getpath', + 'process-pipelines = bob.devtools.scripts.pipelines:process_pipelines', + 'get-pipelines- = bob.devtools.scripts.pipelines:get_pipelines', ], 'bdt.ci.cli': [ diff --git a/version.txt b/version.txt index 99742c270bd7687fb6755b9f0534e132858cefe7..ac50b28e52d7b4f0372066aadff4e07a60a753ee 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.0.6b0 \ No newline at end of file +2.0.2b0 \ No newline at end of file