Commit 0d05b38c authored by André Anjos's avatar André Anjos 💬

[mirror] Implement thorough optional checksum in existing mirrored packages (closes #42)

parent 4fa7f40f
Pipeline #34678 passed with stages
in 3 minutes and 44 seconds
......@@ -151,6 +151,30 @@ def whitelist_filter(packages, globs):
return to_keep
def _sha256sum(filename):
"""Calculates and returns the sha-256 sum given a file name"""
h = hashlib.sha256()
b = bytearray(128*1024)
mv = memoryview(b)
with open(filename, 'rb', buffering=0) as f:
for n in iter(lambda : f.readinto(mv), 0):
h.update(mv[:n])
return h.hexdigest()
def _md5sum(filename):
"""Calculates and returns the md5 sum given a file name"""
h = hashlib.md5()
b = bytearray(128*1024)
mv = memoryview(b)
with open(filename, 'rb', buffering=0) as f:
for n in iter(lambda : f.readinto(mv), 0):
h.update(mv[:n])
return h.hexdigest()
def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run):
"""Downloads remote packages to a download directory
......@@ -177,26 +201,6 @@ def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run):
"""
def _sha256sum(filename):
h = hashlib.sha256()
b = bytearray(128*1024)
mv = memoryview(b)
with open(filename, 'rb', buffering=0) as f:
for n in iter(lambda : f.readinto(mv), 0):
h.update(mv[:n])
return h.hexdigest()
def _md5sum(filename):
h = hashlib.md5()
b = bytearray(128*1024)
mv = memoryview(b)
with open(filename, 'rb', buffering=0) as f:
for n in iter(lambda : f.readinto(mv), 0):
h.update(mv[:n])
return h.hexdigest()
# download files into temporary directory, that is removed by the end of
# the procedure, or if something bad occurs
with tempfile.TemporaryDirectory() as download_dir:
......@@ -319,3 +323,57 @@ def copy_and_clean_json(url, dest_dir, arch, name):
packages = get_local_contents(dest_dir, arch)
data = _cleanup_json(data, packages)
return _save_json(data, dest_dir, arch, name)
def checksum(repodata, basepath, packages):
"""Checksums packages on the local mirror and compare to remote repository
Parameters
----------
repodata : dict
Data loaded from `repodata.json` on the remote repository
basepath : str
Path leading to the packages in the package list
packages : list
List of packages that are available locally, by name
Returns
-------
issues : list
List of matching errors
"""
issues = []
total = len(packages)
for k, p in enumerate(packages):
path_to_package = os.path.join(basepath, p)
# checksum to verify
if p.endswith('.tar.bz2'):
expected_hash = repodata['packages'][p].get('sha256',
repodata['packages'][p]['md5'])
else:
expected_hash = repodata['packages.conda'][p].get('sha256',
repodata['packages.conda'][p]['md5'])
# verify that checksum matches
if len(expected_hash) == 32: #md5
logger.debug('[verify: %d/%d] md5(%s) == %s?', k, total,
path_to_package, expected_hash)
else: #sha256
logger.debug('[verify: %d/%d] sha256(%s) == %s?', k, total,
path_to_package, expected_hash)
if len(expected_hash) == 32: #md5
actual_hash = _md5sum(path_to_package)
else: #sha256
actual_hash = _sha256sum(path_to_package)
if actual_hash != expected_hash:
logger.warning('Checksum of %s does not match remote ' \
'repository description (actual:%r != %r:expected)',
path_to_package, actual_hash, expected_hash)
issues.append(p)
return issues
......@@ -18,6 +18,7 @@ from ..mirror import (
download_packages,
remove_packages,
copy_and_clean_json,
checksum,
)
from ..log import verbosity_option, get_logger, echo_info, echo_warning
......@@ -91,6 +92,15 @@ Examples:
"where a patch_instructions.json exists and must be downloaded and "
"prunned so the mirror works adequately",
)
@click.option(
"-c",
"--checksum/--no-checksum",
default=False,
help="If set, then packages that are supposed to be kept locally "
"will be checksummed against the remote repository repodata.json "
"expections. Errors will be reported and packages will be "
"removed from the local repository",
)
@verbosity_option()
@bdt.raise_on_error
def mirror(
......@@ -166,6 +176,14 @@ def mirror(
to_delete_locally = (local_packages - to_keep) | disappeared_remotely
# execute the transaction
if checksum:
# double-check if, among packages I should keep, everything looks
# already with respect to expected checksums from the remote repo
issues = checksum(remote_repodata, os.path.join(dest_dir, arch),
to_keep)
remove_packages(issues, dest_dir, arch, dry_run)
to_download |= issues
if to_download:
download_packages(to_download, remote_repodata, channel_url, dest_dir,
arch, dry_run)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment