From 5b856408451642c6278b461744ad1fbe21ace97f Mon Sep 17 00:00:00 2001 From: Amir MOHAMMADI <amir.mohammadi@idiap.ch> Date: Tue, 2 Mar 2021 13:09:23 +0100 Subject: [PATCH] [download] rename list_folders to list_dir Add documentation and tests --- MANIFEST.in | 1 + .../data/test_list_folders/README.rst | 0 .../test_list_folders/database1/README1.rst | 0 .../database1/protocol1/dev.csv | 0 .../database1/protocol1/train.csv | 0 .../database1/protocol2/dev.csv | 0 .../database1/protocol2/eval.csv | 0 .../test_list_folders/database2/README2.rst | 0 .../database2/protocol2/dev.csv | 0 .../database2/protocol2/eval.csv | 0 .../database2/protocol3/dev.csv | 0 .../database2/protocol3/train.csv | 0 bob/extension/data/test_list_folders1.tar.gz | Bin 0 -> 396 bytes bob/extension/data/test_list_folders2.tar.gz | Bin 0 -> 377 bytes bob/extension/download.py | 55 +++++++++++++++--- bob/extension/test_download.py | 52 +++++++++++++++-- doc/py_api.rst | 3 + 17 files changed, 96 insertions(+), 15 deletions(-) create mode 100644 bob/extension/data/test_list_folders/README.rst create mode 100644 bob/extension/data/test_list_folders/database1/README1.rst create mode 100644 bob/extension/data/test_list_folders/database1/protocol1/dev.csv create mode 100644 bob/extension/data/test_list_folders/database1/protocol1/train.csv create mode 100644 bob/extension/data/test_list_folders/database1/protocol2/dev.csv create mode 100644 bob/extension/data/test_list_folders/database1/protocol2/eval.csv create mode 100644 bob/extension/data/test_list_folders/database2/README2.rst create mode 100644 bob/extension/data/test_list_folders/database2/protocol2/dev.csv create mode 100644 bob/extension/data/test_list_folders/database2/protocol2/eval.csv create mode 100644 bob/extension/data/test_list_folders/database2/protocol3/dev.csv create mode 100644 bob/extension/data/test_list_folders/database2/protocol3/train.csv create mode 100644 bob/extension/data/test_list_folders1.tar.gz create mode 100644 bob/extension/data/test_list_folders2.tar.gz diff --git a/MANIFEST.in b/MANIFEST.in index fd1d31f..50784c5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,3 +2,4 @@ include LICENSE README.rst buildout.cfg version.txt recursive-include doc conf.py *.rst recursive-include bob *.cpp *.h recursive-include bob/extension/data * +global-exclude *.py[cod] diff --git a/bob/extension/data/test_list_folders/README.rst b/bob/extension/data/test_list_folders/README.rst new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders/database1/README1.rst b/bob/extension/data/test_list_folders/database1/README1.rst new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders/database1/protocol1/dev.csv b/bob/extension/data/test_list_folders/database1/protocol1/dev.csv new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders/database1/protocol1/train.csv b/bob/extension/data/test_list_folders/database1/protocol1/train.csv new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders/database1/protocol2/dev.csv b/bob/extension/data/test_list_folders/database1/protocol2/dev.csv new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders/database1/protocol2/eval.csv b/bob/extension/data/test_list_folders/database1/protocol2/eval.csv new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders/database2/README2.rst b/bob/extension/data/test_list_folders/database2/README2.rst new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders/database2/protocol2/dev.csv b/bob/extension/data/test_list_folders/database2/protocol2/dev.csv new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders/database2/protocol2/eval.csv b/bob/extension/data/test_list_folders/database2/protocol2/eval.csv new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders/database2/protocol3/dev.csv b/bob/extension/data/test_list_folders/database2/protocol3/dev.csv new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders/database2/protocol3/train.csv b/bob/extension/data/test_list_folders/database2/protocol3/train.csv new file mode 100644 index 0000000..e69de29 diff --git a/bob/extension/data/test_list_folders1.tar.gz b/bob/extension/data/test_list_folders1.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e85f6ac8df95097244a66c5d59a5b0bca58ff68a GIT binary patch literal 396 zcmb2|=3rn{wM$@NemnED*AWAew%;1HO@`{%w?5Fh`-4enxhS`+waLcw`AMt?FHVSi zmQuXW)!BPN<ND|4m+mY5xAXm>r=>4umz9)XeYc?@P-|P0<c#-Q|77i7>0o3qshDwB zl0xW<xYd8#i_W%xE?e{Z)Vy}{YP;LZe(#RIUh=KUP$A~e)c?yL|2_Qpqk+}_>(+}B zbW?BHi%*hfJ5c#yz2)@Kv)`l~xIX<qU7x(ScH_F%yOR<E|8__Gk23Q7+V6Pkf2z!% z0;ykX;}0?Y_gN9}e<7!#!#}|jpWnOR-Yf5y7xlI8=+E<(EfNd<xGS8mzwEdA?fQ@W zrzAf0JNDK0&HV3JbEe+$XTQ_D|7Dy0{$hv-{O`ti)t;^L-^I`I?e0(H8>jsLsls3; z&#LqNyuvg4pZjh9|Gsba`u~QE|ML5VZ~b39@l$-sU-c6pE!=DVYy1y9@i{)>ulwtI zk)l7{LeKYq`On{K^Z&>5{gYnFx7z$?{WSksy-3i%=YQ?>H~(M$|38bPfcaD5f_+Wr I7&I6d0FYAG&Hw-a literal 0 HcmV?d00001 diff --git a/bob/extension/data/test_list_folders2.tar.gz b/bob/extension/data/test_list_folders2.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f58278aca45b91ebda4f391152b7adfb5c453946 GIT binary patch literal 377 zcmb2|=3wYmwM$@Nemm1Q>#%`<TdBzW0~6hpGh;KR-TlIrm9s)|W%Jt~f3`ky+9SZq zo~iX>Pp^T5O2sVy`mc^D?s^u<POFy&zutbd`ge1H##Cp9wQDALX>u<z`q{2_BzMlA zfY3wnzx?ZW{#e%i|GMei{Xsk((|&!ZtEsHDzINE+=__T9Z5{i6v-ds_`^?Z?zagsA z>zUMrY4z{-f1LdPL-OiPst4Eoy1ww=S0lk4|1@v>+Ryj*%KTr&uV!vIy#I>Y6??0K zH}jiz{LlPwH@sHrxk~Mu^Yz&Q#s7i@zTe+cZ1mgzCwu3c`AswGf1LhP{%+p?S)b>( z?D${y?Rg2~2baIu$vgjPcKp^?_|N-$dBW@Zy_^9j*E4<l?JoRj{@>@%AKHIi^*`bI zPx~{%cK;`D{2G7hAHVAF<qGfiH~f`<ZqHfxpY_fEivRvjXZGto_!s{9w>(Jb&i|sn qlLfxVJN*0n%m2`||5g9mmCpS?@%}&0VS~t@Z2xK(H!^52FaQ8|9MpUO literal 0 HcmV?d00001 diff --git a/bob/extension/download.py b/bob/extension/download.py index 0480dce..435a048 100644 --- a/bob/extension/download.py +++ b/bob/extension/download.py @@ -411,26 +411,63 @@ def search_file(base_path, options): return None -def list_folders(base_path): +def list_dir(base_path, inner_folder="", folders=True, files=True): + """Lists the files and folders inside a folder or a tarball. + To list an inner level folder (useful when base_path is a tarball), + provide the inner_folder argument. + + Parameters + ---------- + base_path : str + Path to a folder or a tarball + inner_folder : str + Path to an inner folder inside base_path. If given, the folders inside + this folder are listed. + folders : bool + If False, will exclude folders from the results. + files : bool + If False, will exclude files from the results. + + Returns + ------- + list + Sorted list of file and directory names + + Raises + ------ + ValueError + If base_path is not a folder or a tarball + """ # If the input is a directory path = Path(base_path) + results = [] if path.is_dir(): - return sorted(x.name for x in path.iterdir() if x.is_dir()) - # If it's not a directory is a tarball + path = path / inner_folder + for x in path.iterdir(): + if x.is_dir() and folders: + results.append(x.name) + if x.is_file() and files: + results.append(x.name) + + # If it's not a directory, is it a tarball? elif tarfile.is_tarfile(base_path): with tarfile.open(base_path, mode="r") as t: tar_infos = t.getmembers() commonpath = os.path.commonpath([info.name for info in tar_infos]) - commonpath = Path(commonpath) - top_folders = [] + commonpath = Path(commonpath) / inner_folder for info in tar_infos: - if not info.isdir(): + if info.name == ".": continue path = Path(info.name) - if path.parent == commonpath: - top_folders.append(path.name) - return sorted(top_folders) + if path.parent != commonpath: + continue + if info.isdir() and folders: + results.append(path.name) + if info.isfile() and files: + results.append(path.name) else: raise ValueError( f"The provided path: `{base_path}` should be a directory or a tarball." ) + + return sorted(results) diff --git a/bob/extension/test_download.py b/bob/extension/test_download.py index 26b971c..27f18f5 100644 --- a/bob/extension/test_download.py +++ b/bob/extension/test_download.py @@ -3,12 +3,10 @@ import shutil import tempfile import pkg_resources -from bob.extension import rc from bob.extension import rc_context from bob.extension.download import download_and_unzip from bob.extension.download import find_element_in_tarball, search_file, _untar -from bob.extension.download import get_file -import shutil +from bob.extension.download import get_file, list_dir def test_download_unzip(): @@ -40,18 +38,28 @@ def test_get_file(): ): final_filename = get_file( - filename, urls, cache_subdir="databases", file_hash=file_hash, + filename, + urls, + cache_subdir="databases", + file_hash=file_hash, ) assert os.path.exists(final_filename) # Download again. to check the cache final_filename = get_file( - filename, urls, cache_subdir="databases", file_hash=file_hash, + filename, + urls, + cache_subdir="databases", + file_hash=file_hash, ) assert os.path.exists(final_filename) # Download again, no hash. to check the cache - final_filename = get_file(filename, urls, cache_subdir="databases",) + final_filename = get_file( + filename, + urls, + cache_subdir="databases", + ) assert os.path.exists(final_filename) @@ -96,3 +104,35 @@ def test_search_file(): assert search_file(final_path, "protocol_dev_eval/norm/xuxa.csv") is None shutil.rmtree(final_path) + + +def test_list_dir(): + data_folder = pkg_resources.resource_filename(__name__, "data") + + folder = os.path.join(data_folder, "test_list_folders") + tar1 = os.path.join(data_folder, "test_list_folders1.tar.gz") + tar2 = os.path.join(data_folder, "test_list_folders2.tar.gz") + + for root_folder in (folder, tar1, tar2): + fldrs = list_dir(root_folder) + assert fldrs == ["README.rst", "database1", "database2"], (fldrs, root_folder) + fldrs = list_dir(root_folder, files=False) + assert fldrs == ["database1", "database2"], (fldrs, root_folder) + fldrs = list_dir(root_folder, folders=False) + assert fldrs == ["README.rst"], (fldrs, root_folder) + fldrs = list_dir(root_folder, folders=False, files=False) + assert fldrs == [], (fldrs, root_folder) + + fldrs = list_dir(root_folder, "database1") + assert fldrs == ["README1.rst", "protocol1", "protocol2"], (fldrs, root_folder) + fldrs = list_dir(root_folder, "database1", files=False) + assert fldrs == ["protocol1", "protocol2"], (fldrs, root_folder) + fldrs = list_dir(root_folder, "database1", folders=False) + assert fldrs == ["README1.rst"], (fldrs, root_folder) + + fldrs = list_dir(root_folder, "database1/protocol1") + assert fldrs == ["dev.csv", "train.csv"], (fldrs, root_folder) + fldrs = list_dir(root_folder, "database1/protocol1", files=False) + assert fldrs == [], (fldrs, root_folder) + fldrs = list_dir(root_folder, "database1/protocol1", folders=False) + assert fldrs == ["dev.csv", "train.csv"], (fldrs, root_folder) diff --git a/doc/py_api.rst b/doc/py_api.rst index 01177cc..d335423 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -50,6 +50,9 @@ Utilities bob.extension.utils.find_packages bob.extension.utils.link_documentation bob.extension.utils.load_requirements + bob.extension.download.get_file + bob.extension.download.search_file + bob.extension.download.list_dir Configuration ^^^^^^^^^^^^^ -- GitLab