......@@ -48,8 +48,8 @@ def _download(url, target_directory):
download_filename = os.path.join(target_directory, target_filename)
with open(download_filename, 'w+b') as tf:
ret = requests.get(url, stream=True)
size = ret.headers.get('Content-length', '??')
logger.debug('Saving to %s (%s bytes)', download_filename, size)
for data in ret.iter_content(chunk_size):
file_size = os.path.getsize(download_filename)
......@@ -96,16 +96,17 @@ def get_json(channel, platform, name):
url = channel + '/' + platform + '/' + name
logger.debug('[checking] %s...', url)
size = r.headers.get('Content-length', '??')'[download] %s (%s bytes)...', url, size)
if name.endswith('.bz2'):
# just in case transport encoding was applied
r.raw.decode_content = True
data = bz2.decompress(
data =
return json.loads(data)
# else, just decodes the response
return r.json()
def get_local_contents(path, arch):
......@@ -141,6 +142,15 @@ def blacklist_filter(packages, globs):
return packages - to_remove
def whitelist_filter(packages, globs):
"""Filters **in** the input package set with the glob list"""
to_keep = set()
for k in globs:
to_keep |= set(fnmatch.filter(packages, k))
return to_keep
def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run):
"""Downloads remote packages to a download directory
......@@ -215,8 +225,9 @@ def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run):
if not dry_run:
logger.debug('[checking: %d/%d] %s', k, total, url)
r = requests.get(url, stream=True, allow_redirects=True)
size = r.headers.get('Content-length', '??')'[download: %d/%d] %s -> %s (%s bytes)', k,
total, url, temp_dest, size)
open(temp_dest, 'wb').write(
# verify that checksum matches
......@@ -279,3 +290,46 @@ def remove_packages(packages, dest_dir, arch, dry_run):'[remove: %d/%d] %s', k, total, path)
if not dry_run:
def _cleanup_json(data, packages):
"""Cleans-up the contents of conda JSON looking at existing packages"""
# only keys to clean-up here, othere keys remain unchanged
for key in ('packages', 'packages.conda'):
if key not in data: continue
data[key] = dict((k,v) for k,v in data[key].items() if k in packages)
return data
def _save_json(data, dest_dir, arch, name):
"""Saves contents of conda JSON"""
destfile = os.path.join(dest_dir, arch, name)
with open(destfile, 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True, indent=2)
return destfile
def copy_and_clean_json(url, dest_dir, arch, name):
"""Copies and cleans conda JSON file"""
data = get_json(url, arch, name)
packages = get_local_contents(dest_dir, arch)
data = _cleanup_json(data, packages)
return _save_json(data, dest_dir, arch, name)
def copy_and_clean_patch(url, dest_dir, arch, name):
"""Copies and cleans conda JSON file"""
data = get_json(url, arch, name)
packages = get_local_contents(dest_dir, arch)
data = _cleanup_json(data, packages)
# cleanup specific patch_instructions.json fields
for key in ["remove", "revoke"]:
data[key] = [k for k in data[key] if k in packages]
return _save_json(data, dest_dir, arch, name)
......@@ -14,8 +14,10 @@ from ..mirror import (
from ..log import verbosity_option, get_logger, echo_info, echo_warning
......@@ -51,6 +53,15 @@ Examples:
help="A file containing a list of globs to exclude from local " \
"mirroring, one per line",
type=click.Path(exists=True, dir_okay=False, file_okay=True,
readable=True, resolve_path=True),
help="A file containing a list of globs to include at local " \
"mirroring, one per line. This is considered *after* " \
"the blacklisting. It is here just for testing purposes",
......@@ -72,15 +83,25 @@ Examples:
readable=True, writable=True, resolve_path=True),
help="A directory where to store temporary files",
help="If set, then consider we are mirroring the defaults channel "
"where a patch_instructions.json exists and must be downloaded and "
"prunned so the mirror works adequately",
def mirror(
"""Mirrors a conda channel to a particular local destination
......@@ -133,6 +154,10 @@ def mirror(
to_download = blacklist_filter(remote_packages - local_packages,
if whitelist is not None and os.path.exists(whitelist):
globs_to_consider = set(load_glob_list(whitelist))
to_download = whitelist_filter(to_download, globs_to_consider)
# in the local packages, subset those that we no longer need, be it
# because they have been removed from the remote repository, or because
# we decided to blacklist them.
......@@ -157,6 +182,16 @@ def mirror(
echo_info("Mirror at %s/%s is up-to-date w.r.t. blacklist. " \
"No packages to be removed." % (dest_dir, arch))
if patch:
# download/cleanup patch instructions, otherwise conda installs may
# go crazy. Do this before the indexing, that will use that file
# to do its magic.
patch_file = 'patch_instructions.json'
name = copy_and_clean_patch(channel_url, dest_dir, arch,
echo_info("Cleaned copy of %s/%s/%s installed at %s" %
(channel_url, arch, patch_file, name))
# re-indexes the channel to produce a conda-compatible setup
echo_info("Re-indexing %s..." % dest_dir)
if not dry_run:
