From 0436c422681dda74fe163b5bb40ef9edba96d52f Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.dos.anjos@gmail.com>
Date: Tue, 22 Oct 2019 22:19:32 +0200
Subject: [PATCH] [mirror] Implement download retries with random pauses

---
 bob/devtools/mirror.py | 63 ++++++++++++++++++++++++++++--------------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/bob/devtools/mirror.py b/bob/devtools/mirror.py
index 0a0c4c7a..e939b9d8 100644
--- a/bob/devtools/mirror.py
+++ b/bob/devtools/mirror.py
@@ -11,6 +11,8 @@ https://github.com/valassis-digital-media/conda-mirror
 import os
 import bz2
 import json
+import time
+import random
 import hashlib
 import fnmatch
 import tempfile
@@ -207,30 +209,49 @@ def download_packages(packages, repodata, channel_url, dest_dir, arch, dry_run):
             temp_dest = os.path.join(download_dir, p)
             logger.info('[download: %d/%d] %s -> %s', k, total, url, temp_dest)
 
-            if not dry_run:
-                logger.debug('[checking: %d/%d] %s', k, total, url)
-                r = requests.get(url, stream=True, allow_redirects=True)
-                logger.info('[download: %d/%d] %s -> %s (%s bytes)', k, total,
-                        url, temp_dest, r.headers['Content-length'])
-                open(temp_dest, 'wb').write(r.raw.read())
-
-            # verify that checksum matches
-            if len(expected_hash) == 32:  #md5
-                logger.info('[verify: %d/%d] md5(%s) == %s?', k, total,
-                        temp_dest, expected_hash)
-            else:  #sha256
-                logger.info('[verify: %d/%d] sha256(%s) == %s?', k, total,
-                        temp_dest, expected_hash)
+            package_retries = 10
+            while package_retries:
 
-            if not dry_run:
+                if not dry_run:
+                    logger.debug('[checking: %d/%d] %s', k, total, url)
+                    r = requests.get(url, stream=True, allow_redirects=True)
+                    logger.info('[download: %d/%d] %s -> %s (%s bytes)', k,
+                            total, url, temp_dest, r.headers['Content-length'])
+                    open(temp_dest, 'wb').write(r.raw.read())
+
+                # verify that checksum matches
                 if len(expected_hash) == 32:  #md5
-                    actual_hash = _md5sum(temp_dest)
+                    logger.info('[verify: %d/%d] md5(%s) == %s?', k, total,
+                            temp_dest, expected_hash)
                 else:  #sha256
-                    actual_hash = _sha256sum(temp_dest)
-                assert actual_hash == expected_hash, 'Checksum of locally' \
-                        ' downloaded version of %s does not match ' \
-                        '(actual:%r != %r:expected)' % (url, actual_hash,
-                                expected_hash)
+                    logger.info('[verify: %d/%d] sha256(%s) == %s?', k, total,
+                            temp_dest, expected_hash)
+
+                if not dry_run:
+                    if len(expected_hash) == 32:  #md5
+                        actual_hash = _md5sum(temp_dest)
+                    else:  #sha256
+                        actual_hash = _sha256sum(temp_dest)
+
+                    if actual_hash != expected_hash:
+                        wait_time = random.randint(10,61)
+                        logger.warning('Checksum of locally downloaded ' \
+                                ' version of %s does not match ' \
+                                '(actual:%r != %r:expected) - retrying ' \
+                                'after %d seconds', (url, actual_hash,
+                                    expected_hash, wait_time)
+                        os.unlink(temp_dest)
+                        time.sleep(wait_time)
+                        package_retries -= 1
+                        continue
+                    else:
+                        break
+
+            # final check, before we continue
+            assert actual_hash == expected_hash, 'Checksum of locally ' \
+                    'downloaded version of %s does not match ' \
+                    '(actual:%r != %r:expected)' % (url, actual_hash,
+                            expected_hash)
 
             # move
             local_dest = os.path.join(dest_dir, arch, p)
-- 
GitLab