diff --git a/MANIFEST.in b/MANIFEST.in index fd1d31f9fcee8ea07f81560b87e65aa131ca2436..50784c5c927d81d776c266209c285d958b88d2d1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,3 +2,4 @@ include LICENSE README.rst buildout.cfg version.txt recursive-include doc conf.py *.rst recursive-include bob *.cpp *.h recursive-include bob/extension/data * +global-exclude *.py[cod] diff --git a/bob/extension/data/test_list_folders/README.rst b/bob/extension/data/test_list_folders/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders/database1/README1.rst b/bob/extension/data/test_list_folders/database1/README1.rst new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders/database1/protocol1/dev.csv b/bob/extension/data/test_list_folders/database1/protocol1/dev.csv new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders/database1/protocol1/train.csv b/bob/extension/data/test_list_folders/database1/protocol1/train.csv new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders/database1/protocol2/dev.csv b/bob/extension/data/test_list_folders/database1/protocol2/dev.csv new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders/database1/protocol2/eval.csv b/bob/extension/data/test_list_folders/database1/protocol2/eval.csv new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders/database2/README2.rst b/bob/extension/data/test_list_folders/database2/README2.rst new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders/database2/protocol2/dev.csv b/bob/extension/data/test_list_folders/database2/protocol2/dev.csv new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders/database2/protocol2/eval.csv b/bob/extension/data/test_list_folders/database2/protocol2/eval.csv new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders/database2/protocol3/dev.csv b/bob/extension/data/test_list_folders/database2/protocol3/dev.csv new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders/database2/protocol3/train.csv b/bob/extension/data/test_list_folders/database2/protocol3/train.csv new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/extension/data/test_list_folders1.tar.gz b/bob/extension/data/test_list_folders1.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..e85f6ac8df95097244a66c5d59a5b0bca58ff68a Binary files /dev/null and b/bob/extension/data/test_list_folders1.tar.gz differ diff --git a/bob/extension/data/test_list_folders2.tar.gz b/bob/extension/data/test_list_folders2.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f58278aca45b91ebda4f391152b7adfb5c453946 Binary files /dev/null and b/bob/extension/data/test_list_folders2.tar.gz differ diff --git a/bob/extension/download.py b/bob/extension/download.py index 0480dce717e1936afd3056d85cfa5fb4d5226da8..435a048631349da7e143c687269504ba610dc478 100644 --- a/bob/extension/download.py +++ b/bob/extension/download.py @@ -411,26 +411,63 @@ def search_file(base_path, options): return None -def list_folders(base_path): +def list_dir(base_path, inner_folder="", folders=True, files=True): + """Lists the files and folders inside a folder or a tarball. + To list an inner level folder (useful when base_path is a tarball), + provide the inner_folder argument. + + Parameters + ---------- + base_path : str + Path to a folder or a tarball + inner_folder : str + Path to an inner folder inside base_path. If given, the folders inside + this folder are listed. + folders : bool + If False, will exclude folders from the results. + files : bool + If False, will exclude files from the results. + + Returns + ------- + list + Sorted list of file and directory names + + Raises + ------ + ValueError + If base_path is not a folder or a tarball + """ # If the input is a directory path = Path(base_path) + results = [] if path.is_dir(): - return sorted(x.name for x in path.iterdir() if x.is_dir()) - # If it's not a directory is a tarball + path = path / inner_folder + for x in path.iterdir(): + if x.is_dir() and folders: + results.append(x.name) + if x.is_file() and files: + results.append(x.name) + + # If it's not a directory, is it a tarball? elif tarfile.is_tarfile(base_path): with tarfile.open(base_path, mode="r") as t: tar_infos = t.getmembers() commonpath = os.path.commonpath([info.name for info in tar_infos]) - commonpath = Path(commonpath) - top_folders = [] + commonpath = Path(commonpath) / inner_folder for info in tar_infos: - if not info.isdir(): + if info.name == ".": continue path = Path(info.name) - if path.parent == commonpath: - top_folders.append(path.name) - return sorted(top_folders) + if path.parent != commonpath: + continue + if info.isdir() and folders: + results.append(path.name) + if info.isfile() and files: + results.append(path.name) else: raise ValueError( f"The provided path: `{base_path}` should be a directory or a tarball." ) + + return sorted(results) diff --git a/bob/extension/test_download.py b/bob/extension/test_download.py index 26b971c66ac196662aae23f6964d39b25c508648..27f18f5037eb76f71b6d0d67b23aecb817315201 100644 --- a/bob/extension/test_download.py +++ b/bob/extension/test_download.py @@ -3,12 +3,10 @@ import shutil import tempfile import pkg_resources -from bob.extension import rc from bob.extension import rc_context from bob.extension.download import download_and_unzip from bob.extension.download import find_element_in_tarball, search_file, _untar -from bob.extension.download import get_file -import shutil +from bob.extension.download import get_file, list_dir def test_download_unzip(): @@ -40,18 +38,28 @@ def test_get_file(): ): final_filename = get_file( - filename, urls, cache_subdir="databases", file_hash=file_hash, + filename, + urls, + cache_subdir="databases", + file_hash=file_hash, ) assert os.path.exists(final_filename) # Download again. to check the cache final_filename = get_file( - filename, urls, cache_subdir="databases", file_hash=file_hash, + filename, + urls, + cache_subdir="databases", + file_hash=file_hash, ) assert os.path.exists(final_filename) # Download again, no hash. to check the cache - final_filename = get_file(filename, urls, cache_subdir="databases",) + final_filename = get_file( + filename, + urls, + cache_subdir="databases", + ) assert os.path.exists(final_filename) @@ -96,3 +104,35 @@ def test_search_file(): assert search_file(final_path, "protocol_dev_eval/norm/xuxa.csv") is None shutil.rmtree(final_path) + + +def test_list_dir(): + data_folder = pkg_resources.resource_filename(__name__, "data") + + folder = os.path.join(data_folder, "test_list_folders") + tar1 = os.path.join(data_folder, "test_list_folders1.tar.gz") + tar2 = os.path.join(data_folder, "test_list_folders2.tar.gz") + + for root_folder in (folder, tar1, tar2): + fldrs = list_dir(root_folder) + assert fldrs == ["README.rst", "database1", "database2"], (fldrs, root_folder) + fldrs = list_dir(root_folder, files=False) + assert fldrs == ["database1", "database2"], (fldrs, root_folder) + fldrs = list_dir(root_folder, folders=False) + assert fldrs == ["README.rst"], (fldrs, root_folder) + fldrs = list_dir(root_folder, folders=False, files=False) + assert fldrs == [], (fldrs, root_folder) + + fldrs = list_dir(root_folder, "database1") + assert fldrs == ["README1.rst", "protocol1", "protocol2"], (fldrs, root_folder) + fldrs = list_dir(root_folder, "database1", files=False) + assert fldrs == ["protocol1", "protocol2"], (fldrs, root_folder) + fldrs = list_dir(root_folder, "database1", folders=False) + assert fldrs == ["README1.rst"], (fldrs, root_folder) + + fldrs = list_dir(root_folder, "database1/protocol1") + assert fldrs == ["dev.csv", "train.csv"], (fldrs, root_folder) + fldrs = list_dir(root_folder, "database1/protocol1", files=False) + assert fldrs == [], (fldrs, root_folder) + fldrs = list_dir(root_folder, "database1/protocol1", folders=False) + assert fldrs == ["dev.csv", "train.csv"], (fldrs, root_folder) diff --git a/doc/py_api.rst b/doc/py_api.rst index 01177cc480194052e37beea827b126bc17572c75..d335423a07a7d3a3f7d47bba797886c886c7dbd4 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -50,6 +50,9 @@ Utilities bob.extension.utils.find_packages bob.extension.utils.link_documentation bob.extension.utils.load_requirements + bob.extension.download.get_file + bob.extension.download.search_file + bob.extension.download.list_dir Configuration ^^^^^^^^^^^^^