From 5b856408451642c6278b461744ad1fbe21ace97f Mon Sep 17 00:00:00 2001
From: Amir MOHAMMADI <amir.mohammadi@idiap.ch>
Date: Tue, 2 Mar 2021 13:09:23 +0100
Subject: [PATCH] [download] rename list_folders to list_dir Add documentation
 and tests

---
 MANIFEST.in                                   |   1 +
 .../data/test_list_folders/README.rst         |   0
 .../test_list_folders/database1/README1.rst   |   0
 .../database1/protocol1/dev.csv               |   0
 .../database1/protocol1/train.csv             |   0
 .../database1/protocol2/dev.csv               |   0
 .../database1/protocol2/eval.csv              |   0
 .../test_list_folders/database2/README2.rst   |   0
 .../database2/protocol2/dev.csv               |   0
 .../database2/protocol2/eval.csv              |   0
 .../database2/protocol3/dev.csv               |   0
 .../database2/protocol3/train.csv             |   0
 bob/extension/data/test_list_folders1.tar.gz  | Bin 0 -> 396 bytes
 bob/extension/data/test_list_folders2.tar.gz  | Bin 0 -> 377 bytes
 bob/extension/download.py                     |  55 +++++++++++++++---
 bob/extension/test_download.py                |  52 +++++++++++++++--
 doc/py_api.rst                                |   3 +
 17 files changed, 96 insertions(+), 15 deletions(-)
 create mode 100644 bob/extension/data/test_list_folders/README.rst
 create mode 100644 bob/extension/data/test_list_folders/database1/README1.rst
 create mode 100644 bob/extension/data/test_list_folders/database1/protocol1/dev.csv
 create mode 100644 bob/extension/data/test_list_folders/database1/protocol1/train.csv
 create mode 100644 bob/extension/data/test_list_folders/database1/protocol2/dev.csv
 create mode 100644 bob/extension/data/test_list_folders/database1/protocol2/eval.csv
 create mode 100644 bob/extension/data/test_list_folders/database2/README2.rst
 create mode 100644 bob/extension/data/test_list_folders/database2/protocol2/dev.csv
 create mode 100644 bob/extension/data/test_list_folders/database2/protocol2/eval.csv
 create mode 100644 bob/extension/data/test_list_folders/database2/protocol3/dev.csv
 create mode 100644 bob/extension/data/test_list_folders/database2/protocol3/train.csv
 create mode 100644 bob/extension/data/test_list_folders1.tar.gz
 create mode 100644 bob/extension/data/test_list_folders2.tar.gz

diff --git a/MANIFEST.in b/MANIFEST.in
index fd1d31f..50784c5 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,3 +2,4 @@ include LICENSE README.rst buildout.cfg version.txt
 recursive-include doc conf.py *.rst
 recursive-include bob *.cpp *.h
 recursive-include bob/extension/data *
+global-exclude *.py[cod]
diff --git a/bob/extension/data/test_list_folders/README.rst b/bob/extension/data/test_list_folders/README.rst
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders/database1/README1.rst b/bob/extension/data/test_list_folders/database1/README1.rst
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders/database1/protocol1/dev.csv b/bob/extension/data/test_list_folders/database1/protocol1/dev.csv
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders/database1/protocol1/train.csv b/bob/extension/data/test_list_folders/database1/protocol1/train.csv
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders/database1/protocol2/dev.csv b/bob/extension/data/test_list_folders/database1/protocol2/dev.csv
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders/database1/protocol2/eval.csv b/bob/extension/data/test_list_folders/database1/protocol2/eval.csv
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders/database2/README2.rst b/bob/extension/data/test_list_folders/database2/README2.rst
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders/database2/protocol2/dev.csv b/bob/extension/data/test_list_folders/database2/protocol2/dev.csv
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders/database2/protocol2/eval.csv b/bob/extension/data/test_list_folders/database2/protocol2/eval.csv
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders/database2/protocol3/dev.csv b/bob/extension/data/test_list_folders/database2/protocol3/dev.csv
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders/database2/protocol3/train.csv b/bob/extension/data/test_list_folders/database2/protocol3/train.csv
new file mode 100644
index 0000000..e69de29
diff --git a/bob/extension/data/test_list_folders1.tar.gz b/bob/extension/data/test_list_folders1.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..e85f6ac8df95097244a66c5d59a5b0bca58ff68a
GIT binary patch
literal 396
zcmb2|=3rn{wM$@NemnED*AWAew%;1HO@`{%w?5Fh`-4enxhS`+waLcw`AMt?FHVSi
zmQuXW)!BPN<ND|4m+mY5xAXm>r=>4umz9)XeYc?@P-|P0<c#-Q|77i7>0o3qshDwB
zl0xW<xYd8#i_W%xE?e{Z)Vy}{YP;LZe(#RIUh=KUP$A~e)c?yL|2_Qpqk+}_>(+}B
zbW?BHi%*hfJ5c#yz2)@Kv)`l~xIX<qU7x(ScH_F%yOR<E|8__Gk23Q7+V6Pkf2z!%
z0;ykX;}0?Y_gN9}e<7!#!#}|jpWnOR-Yf5y7xlI8=+E<(EfNd<xGS8mzwEdA?fQ@W
zrzAf0JNDK0&HV3JbEe+$XTQ_D|7Dy0{$hv-{O`ti)t;^L-^I`I?e0(H8>jsLsls3;
z&#LqNyuvg4pZjh9|Gsba`u~QE|ML5VZ~b39@l$-sU-c6pE!=DVYy1y9@i{)>ulwtI
zk)l7{LeKYq`On{K^Z&>5{gYnFx7z$?{WSksy-3i%=YQ?>H~(M$|38bPfcaD5f_+Wr
I7&I6d0FYAG&Hw-a

literal 0
HcmV?d00001

diff --git a/bob/extension/data/test_list_folders2.tar.gz b/bob/extension/data/test_list_folders2.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..f58278aca45b91ebda4f391152b7adfb5c453946
GIT binary patch
literal 377
zcmb2|=3wYmwM$@Nemm1Q>#%`<TdBzW0~6hpGh;KR-TlIrm9s)|W%Jt~f3`ky+9SZq
zo~iX>Pp^T5O2sVy`mc^D?s^u<POFy&zutbd`ge1H##Cp9wQDALX>u<z`q{2_BzMlA
zfY3wnzx?ZW{#e%i|GMei{Xsk((|&!ZtEsHDzINE+=__T9Z5{i6v-ds_`^?Z?zagsA
z>zUMrY4z{-f1LdPL-OiPst4Eoy1ww=S0lk4|1@v>+Ryj*%KTr&uV!vIy#I>Y6??0K
zH}jiz{LlPwH@sHrxk~Mu^Yz&Q#s7i@zTe+cZ1mgzCwu3c`AswGf1LhP{%+p?S)b>(
z?D${y?Rg2~2baIu$vgjPcKp^?_|N-$dBW@Zy_^9j*E4<l?JoRj{@>@%AKHIi^*`bI
zPx~{%cK;`D{2G7hAHVAF<qGfiH~f`<ZqHfxpY_fEivRvjXZGto_!s{9w>(Jb&i|sn
qlLfxVJN*0n%m2`||5g9mmCpS?@%}&0VS~t@Z2xK(H!^52FaQ8|9MpUO

literal 0
HcmV?d00001

diff --git a/bob/extension/download.py b/bob/extension/download.py
index 0480dce..435a048 100644
--- a/bob/extension/download.py
+++ b/bob/extension/download.py
@@ -411,26 +411,63 @@ def search_file(base_path, options):
             return None
 
 
-def list_folders(base_path):
+def list_dir(base_path, inner_folder="", folders=True, files=True):
+    """Lists the files and folders inside a folder or a tarball.
+    To list an inner level folder (useful when base_path is a tarball),
+    provide the inner_folder argument.
+
+    Parameters
+    ----------
+    base_path : str
+        Path to a folder or a tarball
+    inner_folder : str
+        Path to an inner folder inside base_path. If given, the folders inside
+        this folder are listed.
+    folders : bool
+        If False, will exclude folders from the results.
+    files : bool
+        If False, will exclude files from the results.
+
+    Returns
+    -------
+    list
+        Sorted list of file and directory names
+
+    Raises
+    ------
+    ValueError
+        If base_path is not a folder or a tarball
+    """
     # If the input is a directory
     path = Path(base_path)
+    results = []
     if path.is_dir():
-        return sorted(x.name for x in path.iterdir() if x.is_dir())
-    # If it's not a directory is a tarball
+        path = path / inner_folder
+        for x in path.iterdir():
+            if x.is_dir() and folders:
+                results.append(x.name)
+            if x.is_file() and files:
+                results.append(x.name)
+
+    # If it's not a directory, is it a tarball?
     elif tarfile.is_tarfile(base_path):
         with tarfile.open(base_path, mode="r") as t:
             tar_infos = t.getmembers()
             commonpath = os.path.commonpath([info.name for info in tar_infos])
-            commonpath = Path(commonpath)
-            top_folders = []
+            commonpath = Path(commonpath) / inner_folder
             for info in tar_infos:
-                if not info.isdir():
+                if info.name == ".":
                     continue
                 path = Path(info.name)
-                if path.parent == commonpath:
-                    top_folders.append(path.name)
-            return sorted(top_folders)
+                if path.parent != commonpath:
+                    continue
+                if info.isdir() and folders:
+                    results.append(path.name)
+                if info.isfile() and files:
+                    results.append(path.name)
     else:
         raise ValueError(
             f"The provided path: `{base_path}` should be a directory or a tarball."
         )
+
+    return sorted(results)
diff --git a/bob/extension/test_download.py b/bob/extension/test_download.py
index 26b971c..27f18f5 100644
--- a/bob/extension/test_download.py
+++ b/bob/extension/test_download.py
@@ -3,12 +3,10 @@ import shutil
 import tempfile
 
 import pkg_resources
-from bob.extension import rc
 from bob.extension import rc_context
 from bob.extension.download import download_and_unzip
 from bob.extension.download import find_element_in_tarball, search_file, _untar
-from bob.extension.download import get_file
-import shutil
+from bob.extension.download import get_file, list_dir
 
 
 def test_download_unzip():
@@ -40,18 +38,28 @@ def test_get_file():
     ):
 
         final_filename = get_file(
-            filename, urls, cache_subdir="databases", file_hash=file_hash,
+            filename,
+            urls,
+            cache_subdir="databases",
+            file_hash=file_hash,
         )
         assert os.path.exists(final_filename)
 
         # Download again. to check the cache
         final_filename = get_file(
-            filename, urls, cache_subdir="databases", file_hash=file_hash,
+            filename,
+            urls,
+            cache_subdir="databases",
+            file_hash=file_hash,
         )
         assert os.path.exists(final_filename)
 
         # Download again, no hash. to check the cache
-        final_filename = get_file(filename, urls, cache_subdir="databases",)
+        final_filename = get_file(
+            filename,
+            urls,
+            cache_subdir="databases",
+        )
         assert os.path.exists(final_filename)
 
 
@@ -96,3 +104,35 @@ def test_search_file():
     assert search_file(final_path, "protocol_dev_eval/norm/xuxa.csv") is None
 
     shutil.rmtree(final_path)
+
+
+def test_list_dir():
+    data_folder = pkg_resources.resource_filename(__name__, "data")
+
+    folder = os.path.join(data_folder, "test_list_folders")
+    tar1 = os.path.join(data_folder, "test_list_folders1.tar.gz")
+    tar2 = os.path.join(data_folder, "test_list_folders2.tar.gz")
+
+    for root_folder in (folder, tar1, tar2):
+        fldrs = list_dir(root_folder)
+        assert fldrs == ["README.rst", "database1", "database2"], (fldrs, root_folder)
+        fldrs = list_dir(root_folder, files=False)
+        assert fldrs == ["database1", "database2"], (fldrs, root_folder)
+        fldrs = list_dir(root_folder, folders=False)
+        assert fldrs == ["README.rst"], (fldrs, root_folder)
+        fldrs = list_dir(root_folder, folders=False, files=False)
+        assert fldrs == [], (fldrs, root_folder)
+
+        fldrs = list_dir(root_folder, "database1")
+        assert fldrs == ["README1.rst", "protocol1", "protocol2"], (fldrs, root_folder)
+        fldrs = list_dir(root_folder, "database1", files=False)
+        assert fldrs == ["protocol1", "protocol2"], (fldrs, root_folder)
+        fldrs = list_dir(root_folder, "database1", folders=False)
+        assert fldrs == ["README1.rst"], (fldrs, root_folder)
+
+        fldrs = list_dir(root_folder, "database1/protocol1")
+        assert fldrs == ["dev.csv", "train.csv"], (fldrs, root_folder)
+        fldrs = list_dir(root_folder, "database1/protocol1", files=False)
+        assert fldrs == [], (fldrs, root_folder)
+        fldrs = list_dir(root_folder, "database1/protocol1", folders=False)
+        assert fldrs == ["dev.csv", "train.csv"], (fldrs, root_folder)
diff --git a/doc/py_api.rst b/doc/py_api.rst
index 01177cc..d335423 100644
--- a/doc/py_api.rst
+++ b/doc/py_api.rst
@@ -50,6 +50,9 @@ Utilities
     bob.extension.utils.find_packages
     bob.extension.utils.link_documentation
     bob.extension.utils.load_requirements
+    bob.extension.download.get_file
+    bob.extension.download.search_file
+    bob.extension.download.list_dir
 
 Configuration
 ^^^^^^^^^^^^^
-- 
GitLab