diff --git a/src/bob/bio/base/database/utils.py b/src/bob/bio/base/database/utils.py index eddb4663adc75ba30a37c4979052aae139c58eb9..62033b78fe731e6c976cc5433314addc3fde01b3 100644 --- a/src/bob/bio/base/database/utils.py +++ b/src/bob/bio/base/database/utils.py @@ -561,13 +561,20 @@ def download_file( local_file = destination_directory / destination_filename needs_download = True - if not force and local_file.is_file(): - logger.info( - "File %s already exists, skipping download (force=%s).", - local_file, - force, - ) - needs_download = False + if force or not local_file.is_file(): + if not force: + logger.info(f"File {local_file} is not present. Needs download.") + needs_download = True + elif local_file.is_file(): + file_ok = verify_file(local_file, checksum, hash_fct=checksum_fct) + if not file_ok: + logger.info( + f"File {local_file} does not checksum to '{checksum=}'." + ) + needs_download = True + elif not force and checksum is not None and file_ok: + logger.info(f"File {local_file} already exists, skipping download.") + needs_download = False if needs_download: for current_download_try in range(checksum_mismatch_download_attempts): @@ -613,31 +620,19 @@ def download_file( with local_file.open("wb") as f: f.write(response.content) - # Check the created file integrity, re-download if needed - if checksum is None or verify_file( - local_file, checksum, hash_fct=checksum_fct - ): - break # Exit the re-download loop - logger.warning( - "Downloading %s created a file with a wrong checksum. Retry %d", - url, - current_download_try + 1, - ) - if current_download_try >= checksum_mismatch_download_attempts - 1: + if checksum is not None: + if not verify_file(local_file, checksum, hash_fct=checksum_fct): + if not needs_download: + raise ValueError( + f"The local file hash does not correspond to '{checksum}' " + f"and {force=} prevents overwriting." + ) raise ValueError( - "The downloaded file hash " - f"({compute_crc(local_file, hash_fct=checksum_fct)}) for " - f"'{url}' does not correspond to '{checksum}', even after " - f"{checksum_mismatch_download_attempts} retries." + "The downloaded file hash ('" + f"{compute_crc(local_file, hash_fct=checksum_fct)}') does not " + f"correspond to '{checksum}'." ) - elif checksum is not None: - if not verify_file(local_file, checksum, hash_fct=checksum_fct): - raise ValueError( - f"The local file hash does not correspond to '{checksum}' and " - f"{force=} prevents overwriting." - ) - if extract: # Extract only if the file was re-downloaded if needs_download: