Skip to content
Snippets Groups Projects
Commit 3a065d15 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[mirror] Implement patch and whitelist support

parent 82c5897d
No related branches found
No related tags found
No related merge requests found
Pipeline #34665 passed
...@@ -48,8 +48,8 @@ def _download(url, target_directory): ...@@ -48,8 +48,8 @@ def _download(url, target_directory):
download_filename = os.path.join(target_directory, target_filename) download_filename = os.path.join(target_directory, target_filename)
with open(download_filename, 'w+b') as tf: with open(download_filename, 'w+b') as tf:
ret = requests.get(url, stream=True) ret = requests.get(url, stream=True)
logger.debug('Saving to %s (%s bytes)', download_filename, size = ret.headers.get('Content-length', '??')
ret.headers['Content-length']) logger.debug('Saving to %s (%s bytes)', download_filename, size)
for data in ret.iter_content(chunk_size): for data in ret.iter_content(chunk_size):
tf.write(data) tf.write(data)
file_size = os.path.getsize(download_filename) file_size = os.path.getsize(download_filename)
...@@ -96,16 +96,17 @@ def get_json(channel, platform, name): ...@@ -96,16 +96,17 @@ def get_json(channel, platform, name):
url = channel + '/' + platform + '/' + name url = channel + '/' + platform + '/' + name
logger.debug('[checking] %s...', url) logger.debug('[checking] %s...', url)
r = requests.get(url, allow_redirects=True, stream=True) r = requests.get(url, allow_redirects=True, stream=True)
logger.info('[download] %s (%s bytes)...', url, r.headers['Content-length']) size = r.headers.get('Content-length', '??')
logger.info('[download] %s (%s bytes)...', url, size)
if name.endswith('.bz2'): if name.endswith('.bz2'):
# just in case transport encoding was applied # just in case transport encoding was applied
r.raw.decode_content = True r.raw.decode_content = True
data = bz2.decompress(r.raw.read()) data = bz2.decompress(r.raw.read())
else: return json.loads(data)
data = r.read()
return json.loads(data) # else, just decodes the response
return r.json()
def get_local_contents(path, arch): def get_local_contents(path, arch):
...@@ -141,6 +142,15 @@ def blacklist_filter(packages, globs): ...@@ -141,6 +142,15 @@ def blacklist_filter(packages, globs):
return packages - to_remove return packages - to_remove
def whitelist_filter(packages, globs):
"""Filters **in** the input package set with the glob list"""
to_keep = set()
for k in globs:
to_keep |= set(fnmatch.filter(packages, k))
return to_keep
def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run): def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run):
"""Downloads remote packages to a download directory """Downloads remote packages to a download directory
...@@ -215,8 +225,9 @@ def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run): ...@@ -215,8 +225,9 @@ def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run):
if not dry_run: if not dry_run:
logger.debug('[checking: %d/%d] %s', k, total, url) logger.debug('[checking: %d/%d] %s', k, total, url)
r = requests.get(url, stream=True, allow_redirects=True) r = requests.get(url, stream=True, allow_redirects=True)
size = r.headers.get('Content-length', '??')
logger.info('[download: %d/%d] %s -> %s (%s bytes)', k, logger.info('[download: %d/%d] %s -> %s (%s bytes)', k,
total, url, temp_dest, r.headers['Content-length']) total, url, temp_dest, size)
open(temp_dest, 'wb').write(r.raw.read()) open(temp_dest, 'wb').write(r.raw.read())
# verify that checksum matches # verify that checksum matches
...@@ -279,3 +290,46 @@ def remove_packages(packages, dest_dir, arch, dry_run): ...@@ -279,3 +290,46 @@ def remove_packages(packages, dest_dir, arch, dry_run):
logger.info('[remove: %d/%d] %s', k, total, path) logger.info('[remove: %d/%d] %s', k, total, path)
if not dry_run: if not dry_run:
os.unlink(path) os.unlink(path)
def _cleanup_json(data, packages):
"""Cleans-up the contents of conda JSON looking at existing packages"""
# only keys to clean-up here, othere keys remain unchanged
for key in ('packages', 'packages.conda'):
if key not in data: continue
data[key] = dict((k,v) for k,v in data[key].items() if k in packages)
return data
def _save_json(data, dest_dir, arch, name):
"""Saves contents of conda JSON"""
destfile = os.path.join(dest_dir, arch, name)
with open(destfile, 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True, indent=2)
return destfile
def copy_and_clean_json(url, dest_dir, arch, name):
"""Copies and cleans conda JSON file"""
data = get_json(url, arch, name)
packages = get_local_contents(dest_dir, arch)
data = _cleanup_json(data, packages)
return _save_json(data, dest_dir, arch, name)
def copy_and_clean_patch(url, dest_dir, arch, name):
"""Copies and cleans conda JSON file"""
data = get_json(url, arch, name)
packages = get_local_contents(dest_dir, arch)
data = _cleanup_json(data, packages)
# cleanup specific patch_instructions.json fields
for key in ["remove", "revoke"]:
data[key] = [k for k in data[key] if k in packages]
return _save_json(data, dest_dir, arch, name)
...@@ -14,8 +14,10 @@ from ..mirror import ( ...@@ -14,8 +14,10 @@ from ..mirror import (
get_local_contents, get_local_contents,
load_glob_list, load_glob_list,
blacklist_filter, blacklist_filter,
whitelist_filter,
download_packages, download_packages,
remove_packages, remove_packages,
copy_and_clean_patch,
) )
from ..log import verbosity_option, get_logger, echo_info, echo_warning from ..log import verbosity_option, get_logger, echo_info, echo_warning
...@@ -51,6 +53,15 @@ Examples: ...@@ -51,6 +53,15 @@ Examples:
help="A file containing a list of globs to exclude from local " \ help="A file containing a list of globs to exclude from local " \
"mirroring, one per line", "mirroring, one per line",
) )
@click.option(
"-w",
"--whitelist",
type=click.Path(exists=True, dir_okay=False, file_okay=True,
readable=True, resolve_path=True),
help="A file containing a list of globs to include at local " \
"mirroring, one per line. This is considered *after* " \
"the blacklisting. It is here just for testing purposes",
)
@click.option( @click.option(
"-m", "-m",
"--check-md5/--no-check-md5", "--check-md5/--no-check-md5",
...@@ -72,15 +83,25 @@ Examples: ...@@ -72,15 +83,25 @@ Examples:
readable=True, writable=True, resolve_path=True), readable=True, writable=True, resolve_path=True),
help="A directory where to store temporary files", help="A directory where to store temporary files",
) )
@click.option(
"-p",
"--patch/--no-patch",
default=False,
help="If set, then consider we are mirroring the defaults channel "
"where a patch_instructions.json exists and must be downloaded and "
"prunned so the mirror works adequately",
)
@verbosity_option() @verbosity_option()
@bdt.raise_on_error @bdt.raise_on_error
def mirror( def mirror(
channel_url, channel_url,
dest_dir, dest_dir,
blacklist, blacklist,
whitelist,
check_md5, check_md5,
dry_run, dry_run,
tmpdir, tmpdir,
patch,
): ):
"""Mirrors a conda channel to a particular local destination """Mirrors a conda channel to a particular local destination
...@@ -133,6 +154,10 @@ def mirror( ...@@ -133,6 +154,10 @@ def mirror(
to_download = blacklist_filter(remote_packages - local_packages, to_download = blacklist_filter(remote_packages - local_packages,
globs_to_remove) globs_to_remove)
if whitelist is not None and os.path.exists(whitelist):
globs_to_consider = set(load_glob_list(whitelist))
to_download = whitelist_filter(to_download, globs_to_consider)
# in the local packages, subset those that we no longer need, be it # in the local packages, subset those that we no longer need, be it
# because they have been removed from the remote repository, or because # because they have been removed from the remote repository, or because
# we decided to blacklist them. # we decided to blacklist them.
...@@ -157,6 +182,16 @@ def mirror( ...@@ -157,6 +182,16 @@ def mirror(
echo_info("Mirror at %s/%s is up-to-date w.r.t. blacklist. " \ echo_info("Mirror at %s/%s is up-to-date w.r.t. blacklist. " \
"No packages to be removed." % (dest_dir, arch)) "No packages to be removed." % (dest_dir, arch))
if patch:
# download/cleanup patch instructions, otherwise conda installs may
# go crazy. Do this before the indexing, that will use that file
# to do its magic.
patch_file = 'patch_instructions.json'
name = copy_and_clean_patch(channel_url, dest_dir, arch,
patch_file)
echo_info("Cleaned copy of %s/%s/%s installed at %s" %
(channel_url, arch, patch_file, name))
# re-indexes the channel to produce a conda-compatible setup # re-indexes the channel to produce a conda-compatible setup
echo_info("Re-indexing %s..." % dest_dir) echo_info("Re-indexing %s..." % dest_dir)
if not dry_run: if not dry_run:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment