diff --git a/bob/bio/base/preprocessor/Filename.py b/bob/bio/base/preprocessor/Filename.py index 8d2f8ba0795c3a2c6325cda505399419fb421f80..a1ab0f6448d125865bfcf5253ed2ef6a790472cf 100644 --- a/bob/bio/base/preprocessor/Filename.py +++ b/bob/bio/base/preprocessor/Filename.py @@ -27,7 +27,7 @@ class Filename (Preprocessor): """ def __init__(self): - pass + Preprocessor.__init__(self, writes_data=False) # The call function (i.e. the operator() in C++ terms) diff --git a/bob/bio/base/preprocessor/Preprocessor.py b/bob/bio/base/preprocessor/Preprocessor.py index a01026a72a3ed2f976b0b75e37d1a005af05afa2..f5a7e16afcb160cc5680f19a5a012fd580a1d1ae 100644 --- a/bob/bio/base/preprocessor/Preprocessor.py +++ b/bob/bio/base/preprocessor/Preprocessor.py @@ -33,9 +33,10 @@ class Preprocessor: A list of keyword arguments to be written in the :py:meth:`__str__` function. """ - def __init__(self, **kwargs): + def __init__(self, writes_data = True, **kwargs): # Each class needs to have a constructor taking # all the parameters that are required for the preprocessing as arguments + self.writes_data = writes_data self._kwargs = kwargs pass diff --git a/bob/bio/base/tools/extractor.py b/bob/bio/base/tools/extractor.py index 5872a4ba11aba49e8a3674814f63c9e75721bb60..a28016af45925dce79f2cdf01a93f34262b2f57e 100644 --- a/bob/bio/base/tools/extractor.py +++ b/bob/bio/base/tools/extractor.py @@ -102,7 +102,7 @@ def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_ data_file = data_files[i] feature_file = feature_files[i] - if not os.path.exists(data_file): + if not os.path.exists(data_file) and preprocessor.writes_data: if allow_missing_files: logger.debug("... Cannot find preprocessed data file %s; skipping", data_file) continue diff --git a/bob/bio/base/tools/preprocessor.py b/bob/bio/base/tools/preprocessor.py index d28b2852b47f4601351b5484b32f19e3d558f15a..01dea478a7976255216d159ba4d1653d9c1e05b5 100644 --- a/bob/bio/base/tools/preprocessor.py +++ b/bob/bio/base/tools/preprocessor.py @@ -33,6 +33,11 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files force : bool If given, files are regenerated, even if they already exist. """ + if not preprocessor.writes_data: + # The preprocessor does not write anything, so no need to call it + logger.info("Skipping preprocessing as preprocessor does not write any data") + return + # the file selector object fs = FileSelector.instance() @@ -110,9 +115,10 @@ def read_preprocessed_data(file_names, preprocessor, split_by_client = False, al preprocessed : [object] or [[object]] The list of preprocessed data, in the same order as in the ``file_names``. """ - file_names = utils.filter_missing_files(file_names, split_by_client, allow_missing_files) + file_names = utils.filter_missing_files(file_names, split_by_client, allow_missing_files and preprocessor.writes_data) if split_by_client: - return [[preprocessor.read_data(f) for f in client_files] for client_files in file_names] + preprocessed = [[preprocessor.read_data(f) for f in client_files] for client_files in file_names] else: - return [preprocessor.read_data(f) for f in file_names] + preprocessed = [preprocessor.read_data(f) for f in file_names] + return utils.filter_none(preprocessed, split_by_client) diff --git a/bob/bio/base/utils/io.py b/bob/bio/base/utils/io.py index 1e2851bcf9d79e092936e1177c9f9438e2b9131d..1e73f499437ef64a82c493128f7596b8c0ad7cca 100644 --- a/bob/bio/base/utils/io.py +++ b/bob/bio/base/utils/io.py @@ -22,6 +22,21 @@ def filter_missing_files(file_names, split_by_client=False, allow_missing_files= return existing_files +def filter_none(data, split_by_client=False): + """This function filters out ``None`` values from the given list (or list of lists, when ``split_by_client`` is enabled).""" + + if split_by_client: + # filter out missing files and empty clients + existing_data = [[d for d in client_data if d is not None] for client_data in data] + existing_data = [client_data for client_data in existing_data if client_data] + else: + # filter out missing files + existing_data = [d for d in data if d is not None] + return existing_data + + + + def check_file(filename, force, expected_file_size = 1): """Checks if the file with the given ``filename`` exists and has size greater or equal to ``expected_file_size``. If the file is to small, **or** if the ``force`` option is set to ``True``, the file is removed.