From 40f08faf9da12383dec07fd97f771155c91eae8f Mon Sep 17 00:00:00 2001
From: Amir MOHAMMADI <amir.mohammadi@idiap.ch>
Date: Mon, 15 Mar 2021 15:42:03 +0100
Subject: [PATCH] bdt dav upload automatically adds checksum to filename on
 remote Adds an option (--checksum) to let bdt automatically augment the
 filename on the remote with the first 8 digits of its sha256sum. Fixes #64

---
 bob/devtools/dav.py         | 52 ++++++++++++++++++++++++++++++-------
 bob/devtools/scripts/dav.py | 37 ++++++++++++++++++--------
 2 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/bob/devtools/dav.py b/bob/devtools/dav.py
index 7d30f14d..e01cfd2c 100644
--- a/bob/devtools/dav.py
+++ b/bob/devtools/dav.py
@@ -2,15 +2,17 @@
 # -*- coding: utf-8 -*-
 
 import configparser
+import hashlib
 import os
+import pathlib
 import re
 
 from distutils.version import StrictVersion
 
 import dateutil.parser
 
-from .deploy import _setup_webdav_client
 from .config import read_config
+from .deploy import _setup_webdav_client
 from .log import echo_normal
 from .log import echo_warning
 from .log import get_logger
@@ -26,20 +28,23 @@ def _get_config():
 
     # this does some sort of validation for the "webdav" data...
     if "webdav" in data:
-        if ("server" not in data["webdav"]
-                or "username" not in data["webdav"]
-                or "password" not in data["webdav"]
-                ):
+        if (
+            "server" not in data["webdav"]
+            or "username" not in data["webdav"]
+            or "password" not in data["webdav"]
+        ):
             raise KeyError(
-                f"If the configuration file {k} contains a \"webdav\" " \
-                f"section, it should contain 3 variables defined inside: " \
+                f'If the configuration file {k} contains a "webdav" '
+                f"section, it should contain 3 variables defined inside: "
                 f'"server", "username", "password".'
             )
     else:
         # ask the user for the information, in case nothing available
-        logger.warn("Requesting server information for webDAV operation. " \
-                "(To create a configuration file, and avoid these, follow " \
-                "the Setup subsection at our Installation manual.)")
+        logger.warn(
+            "Requesting server information for webDAV operation. "
+            "(To create a configuration file, and avoid these, follow "
+            "the Setup subsection at our Installation manual.)"
+        )
         webdav_data = dict()
         webdav_data["server"] = input("The base address of the server: ")
         webdav_data["username"] = input("Username: ")
@@ -49,6 +54,33 @@ def _get_config():
     return data["webdav"]
 
 
+def compute_sha256(path):
+    sha256_hash = hashlib.sha256()
+    with open(path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha256_hash.update(byte_block)
+    file_hash = sha256_hash.hexdigest()
+    return file_hash
+
+
+def augment_path_with_hash(path):
+    """Adds the first 8 digits of sha256sum of a file to its name.
+
+    Example::
+
+        augment_path_with_hash('/datasets/pad-face-replay-attack.tar.gz')
+        '/datasets/pad-face-replay-attack-a8e31cc3.tar.gz'
+    """
+    path = pathlib.Path(path)
+    if not path.is_file():
+        raise ValueError(f"Can only augment path to files with a hash. Got: {path}")
+    file_hash = compute_sha256(path)[:8]
+    suffix = "".join(path.suffixes)
+    base_name = str(path.name)[: -len(suffix) or None]
+    new_path = path.parent / f"{base_name}-{file_hash}{suffix}"
+    return str(new_path)
+
+
 def setup_webdav_client(private):
     """Returns a ready-to-use WebDAV client"""
 
diff --git a/bob/devtools/scripts/dav.py b/bob/devtools/scripts/dav.py
index 26f161d9..a1054daa 100644
--- a/bob/devtools/scripts/dav.py
+++ b/bob/devtools/scripts/dav.py
@@ -8,6 +8,7 @@ import pkg_resources
 
 from click_plugins import with_plugins
 
+from ..dav import augment_path_with_hash
 from ..dav import remove_old_beta_packages
 from ..dav import setup_webdav_client
 from ..log import echo_info
@@ -66,13 +67,14 @@ Examples:
     help="If set, print details about each listed file",
 )
 @click.argument(
-    "path", default="/", required=False,
+    "path",
+    default="/",
+    required=False,
 )
 @verbosity_option()
 @bdt.raise_on_error
 def list(private, long_format, path):
-    """List the contents of a given WebDAV directory.
-    """
+    """List the contents of a given WebDAV directory."""
 
     if not path.startswith("/"):
         path = "/" + path
@@ -105,7 +107,8 @@ Examples:
     help="If set, use the 'private' area instead of the public one",
 )
 @click.argument(
-    "path", required=True,
+    "path",
+    required=True,
 )
 @verbosity_option()
 @bdt.raise_on_error
@@ -165,7 +168,8 @@ Examples:
     help="If this flag is set, then execute the removal",
 )
 @click.argument(
-    "path", required=True,
+    "path",
+    required=True,
 )
 @verbosity_option()
 @bdt.raise_on_error
@@ -199,12 +203,12 @@ Examples:
 
   1. Uploads a single file to a specific location:
 
-     $ bdt dav -vv copy local/file remote
+     $ bdt dav upload -vv --checksum local/file remote
 
 
   2. Uploads various resources at once:
 
-     $ bdt dav -vv copy local/file1 local/dir local/file2 remote
+     $ bdt dav upload -vv --checksum local/file1 local/dir local/file2 remote
 
 """
 )
@@ -220,6 +224,12 @@ Examples:
     default=False,
     help="If this flag is set, then execute the removal",
 )
+@click.option(
+    "-c",
+    "--checksum/--no-checksum",
+    default=False,
+    help="If set, will augment the filename(s) on the server with 8 first characters of their sha256 checksum.",
+)
 @click.argument(
     "local",
     required=True,
@@ -227,11 +237,12 @@ Examples:
     nargs=-1,
 )
 @click.argument(
-    "remote", required=True,
+    "remote",
+    required=True,
 )
 @verbosity_option()
 @bdt.raise_on_error
-def upload(private, execute, local, remote):
+def upload(private, execute, checksum, local, remote):
     """Uploads a local resource (file or directory) to a remote destination
 
     If the local resource is a directory, it is uploaded recursively.  If the
@@ -258,7 +269,10 @@ def upload(private, execute, local, remote):
         return 1
 
     for k in local:
-        actual_remote = remote + os.path.basename(k)
+        path_with_hash = k
+        if checksum:
+            path_with_hash = augment_path_with_hash(k)
+        actual_remote = remote + os.path.basename(path_with_hash)
         remote_path = cl.get_url(actual_remote)
 
         if cl.check(actual_remote):
@@ -308,7 +322,8 @@ Examples:
     help="If this flag is set, then execute the removal",
 )
 @click.argument(
-    "path", required=True,
+    "path",
+    required=True,
 )
 @verbosity_option()
 @bdt.raise_on_error
-- 
GitLab