Skip to content
Snippets Groups Projects
Commit 83c47f14 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

Merge branch 'improve-search_file' into 'master'

Improves the search_file function

Closes #88

See merge request !145
parents 7776b372 aeba4e05
No related branches found
No related tags found
1 merge request!145Improves the search_file function
Pipeline #62261 passed
No preview for this file type
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# vim: set fileencoding=utf-8 : # vim: set fileencoding=utf-8 :
import bz2 import bz2
import glob
import hashlib import hashlib
import io import io
import logging import logging
...@@ -347,21 +348,22 @@ def find_element_in_tarball(filename, target_path, open_as_stream=False): ...@@ -347,21 +348,22 @@ def find_element_in_tarball(filename, target_path, open_as_stream=False):
""" """
f = tarfile.open(filename) f = tarfile.open(filename)
for member in f.getmembers(): # iterate over the members of the tarball
if member.isdir(): while True:
member = f.next()
if member is None:
return None
if not member.isfile():
continue continue
if ( if not member.name.endswith(target_path):
member.isfile() continue
and target_path in member.name
and os.path.split(target_path)[-1] == os.path.split(member.name)[-1] if open_as_stream:
): return io.BufferedReader(f.extractfile(member)).read()
if open_as_stream: else:
return io.BufferedReader(f.extractfile(member)).read() return io.TextIOWrapper(f.extractfile(member), encoding="utf-8")
else:
return io.TextIOWrapper(f.extractfile(member), encoding="utf-8")
else:
return None
def search_file(base_path, options): def search_file(base_path, options):
...@@ -372,16 +374,18 @@ def search_file(base_path, options): ...@@ -372,16 +374,18 @@ def search_file(base_path, options):
---------- ----------
base_path: str base_path: str
Base path to start the search, or the tarball to be searched Base folder to start the search, or the tarball to be searched
options: list options: list
Files to be searched. This function will return the first occurency Files to be searched. This function will return the first occurrence.
The option can be an incomplete relative path. For example, if you have
a file called ``"/a/b/c/d.txt"``, and base_path is ``"/a/b"``, then
options can be ``["d.txt"]``.
Returns Returns
------- -------
object object
It returns an opened file It returns an opened file
""" """
if not isinstance(options, list): if not isinstance(options, list):
...@@ -389,26 +393,13 @@ def search_file(base_path, options): ...@@ -389,26 +393,13 @@ def search_file(base_path, options):
# If the input is a directory # If the input is a directory
if os.path.isdir(base_path): if os.path.isdir(base_path):
def get_fs():
fs = []
for root, _, files in os.walk(base_path, topdown=False):
for name in files:
fs.append(os.path.join(root, name))
return fs
def search_in_list(o, lst):
for i, l in enumerate(lst):
if o in l:
return i
else:
return -1
fs = get_fs()
for o in options: for o in options:
index = search_in_list(o, fs) # we append './' to o because o might start with /
if index >= 0: pattern = os.path.join(base_path, "**", f"./{o}")
return open(fs[index]) for path in glob.iglob(pattern, recursive=True):
if not os.path.isfile(path):
continue
return open(path)
else: else:
return None return None
else: else:
......
...@@ -97,3 +97,4 @@ def _saverc(context): ...@@ -97,3 +97,4 @@ def _saverc(context):
path = _get_rc_path() path = _get_rc_path()
with open(path, "wt") as f: with open(path, "wt") as f:
f.write(_rc_to_str(context)) f.write(_rc_to_str(context))
f.write("\n")
...@@ -97,27 +97,37 @@ def test_search_file(): ...@@ -97,27 +97,37 @@ def test_search_file():
filename = pkg_resources.resource_filename( filename = pkg_resources.resource_filename(
__name__, "data/example_csv_filelist.tar.gz" __name__, "data/example_csv_filelist.tar.gz"
) )
# Search in the tarball
assert (
search_file(filename, "protocol_dev_eval/norm/train_world.csv")
is not None
)
assert search_file(filename, "protocol_dev_eval/norm/xuxa.csv") is None
# Search in a file structure
final_path = "./test_search_file"
pass
_untar(filename, final_path, ".gz")
assert (
search_file(final_path, "protocol_dev_eval/norm/train_world.csv")
is not None
)
assert search_file(final_path, "protocol_dev_eval/norm/xuxa.csv") is None
shutil.rmtree(final_path) with tempfile.TemporaryDirectory(suffix="_extracted") as tmpdir:
_untar(filename, tmpdir, ".gz")
# Search in the tarball and in its extracted folder
for final_path in (filename, tmpdir):
in_extracted_folder = final_path.endswith("_extracted")
all_files = list_dir(final_path)
output_file = search_file(
final_path, "protocol_dev_eval/norm/train_world.csv"
)
assert output_file is not None, all_files
# test to see if using / we can force an exact match
output_file = search_file(
final_path, "/protocol_dev_eval/norm/train_world.csv"
)
assert output_file is not None, all_files
assert "my_data" not in output_file.read()
if in_extracted_folder:
assert "my_protocol" not in output_file.name
assert (
search_file(final_path, "norm/train_world.csv") is not None
), all_files
assert (
search_file(final_path, "protocol_dev_eval/norm/xuxa.csv")
is None
), all_files
def test_list_dir(): def test_list_dir():
......
...@@ -74,8 +74,14 @@ def test_bob_config(): ...@@ -74,8 +74,14 @@ def test_bob_config():
assert expected_output == result.output, result.output assert expected_output == result.output, result.output
# test config unset (with starting substring) # test config unset (with starting substring)
result = runner.invoke(main_cli, ["config", "unset", "bob.db.atnt"]) result = runner.invoke(
result = runner.invoke(main_cli, ["config", "get", "bob.db.atnt"]) main_cli,
["config", "unset", "bob.db.atnt"],
env={ENVNAME: bobrcfile},
)
result = runner.invoke(
main_cli, ["config", "get", "bob.db.atnt"], env={ENVNAME: bobrcfile}
)
assert_click_runner_result(result, 1) assert_click_runner_result(result, 1)
# test config unset (with substring contained) # test config unset (with substring contained)
...@@ -91,7 +97,11 @@ def test_bob_config(): ...@@ -91,7 +97,11 @@ def test_bob_config():
env={ENVNAME: bobrcfile}, env={ENVNAME: bobrcfile},
) )
result = runner.invoke( result = runner.invoke(
main_cli, ["config", "unset", "--contain", "atnt"] main_cli,
["config", "unset", "--contain", "atnt"],
env={ENVNAME: bobrcfile},
)
result = runner.invoke(
main_cli, ["config", "get", "bob.db.atnt"], env={ENVNAME: bobrcfile}
) )
result = runner.invoke(main_cli, ["config", "get", "bob.db.atnt"])
assert_click_runner_result(result, 1) assert_click_runner_result(result, 1)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment