From f21f2ecea46197a90e36976c6f1e087946e51e05 Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Wed, 2 Jun 2021 13:46:58 +0200 Subject: [PATCH] [scripts.mirror] Add option to filter packages by date --- bob/devtools/scripts/mirror.py | 80 ++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/bob/devtools/scripts/mirror.py b/bob/devtools/scripts/mirror.py index f8ac0b00..0635143b 100644 --- a/bob/devtools/scripts/mirror.py +++ b/bob/devtools/scripts/mirror.py @@ -2,6 +2,7 @@ # vim: set fileencoding=utf-8 : +import datetime import os import tempfile @@ -33,7 +34,7 @@ Examples: 1. Mirrors a conda channel: \b - $ bdt mirror -vv https://www.idiap.ch/software/bob/label/beta + $ bdt mirror -vv https://www.idiap.ch/software/bob/conda/label/beta mirror """ ) @@ -57,7 +58,11 @@ Examples: "-b", "--blacklist", type=click.Path( - exists=True, dir_okay=False, file_okay=True, readable=True, resolve_path=True + exists=True, + dir_okay=False, + file_okay=True, + readable=True, + resolve_path=True, ), help="A file containing a list of globs to exclude from local " "mirroring, one per line", @@ -66,7 +71,11 @@ Examples: "-w", "--whitelist", type=click.Path( - exists=True, dir_okay=False, file_okay=True, readable=True, resolve_path=True + exists=True, + dir_okay=False, + file_okay=True, + readable=True, + resolve_path=True, ), help="A file containing a list of globs to include at local " "mirroring, one per line. This is considered *after* " @@ -113,9 +122,18 @@ Examples: default=False, help="If set, then packages that are supposed to be kept locally " "will be checksummed against the remote repository repodata.json " - "expections. Errors will be reported and packages will be " + "expectations. Errors will be reported and packages will be " "removed from the local repository", ) +@click.option( + "-s", + "--start-date", + type=click.DateTime(formats=["%Y-%m-%d"]), + help="To filter packages to mirror by date, supply a starting date " + "in isoformat (yyyy-mm-dd, e.g. 2019-03-30). If you do so, then only " + "packages that were recorded at the date or later will be downloaded " + "to your mirror", +) @verbosity_option() @bdt.raise_on_error def mirror( @@ -128,6 +146,7 @@ def mirror( tmpdir, patch, checksum, + start_date, ): """Mirrors a conda channel to a particular local destination @@ -158,23 +177,28 @@ def mirror( logger.info("Creating conda channel at %s...", dest_dir) if not dry_run: conda_build.api.update_index( - [dest_dir], subdir=DEFAULT_SUBDIRS, progress=False, verbose=False + [dest_dir], + subdir=DEFAULT_SUBDIRS, + progress=False, + verbose=False, ) + start_date = start_date.date() # only interested on the day itself + for arch in DEFAULT_SUBDIRS: remote_repodata = get_json(channel_url, arch, "repodata_from_packages.json.bz2") - logger.info( - "%d packages available in remote index", - len(remote_repodata.get("packages", {})), - ) + + # merge all available packages into one single dictionary + remote_package_info = {} + remote_package_info.update(remote_repodata.get("packages", {})) + remote_package_info.update(remote_repodata.get("packages.conda", {})) + + logger.info("%d packages available in remote index", len(remote_package_info)) local_packages = get_local_contents(dest_dir, arch) logger.info("%d packages available in local mirror", len(local_packages)) - remote_packages = set( - list(remote_repodata.get("packages", {}).keys()) - + list(remote_repodata.get("packages.conda", {}).keys()) - ) + remote_packages = set(remote_package_info.keys()) if blacklist is not None and os.path.exists(blacklist): globs_to_remove = set(load_glob_list(blacklist)) @@ -191,6 +215,29 @@ def mirror( globs_to_consider = set(load_glob_list(whitelist)) to_download = whitelist_filter(to_download, globs_to_consider) + # if the user passed a cut date, only download packages that are newer + # or at the same date than the proposed date + if start_date is not None: + too_old = set() + for k in to_download: + pkgdate = datetime.datetime.fromtimestamp( + remote_package_info[k]["timestamp"] / 1000.0 + ).date() + if pkgdate < start_date: + logger.debug( + "Package %s, from %s is older than %s, not downloading", + k, + pkgdate.isoformat(), + start_date.isoformat(), + ) + too_old.add(k) + logger.info( + "Filtering out %d older packages from index (older than %s)", + len(too_old), + start_date.isoformat(), + ) + to_download -= too_old + # in the local packages, subset those that we no longer need, be it # because they have been removed from the remote repository, or because # we decided to blacklist them. @@ -215,7 +262,12 @@ def mirror( if to_download: download_packages( - to_download, remote_repodata, channel_url, dest_dir, arch, dry_run + to_download, + remote_repodata, + channel_url, + dest_dir, + arch, + dry_run, ) else: echo_info( -- GitLab