From b6c65c06ee75a33d357675c7e3d69d2ca20bd733 Mon Sep 17 00:00:00 2001 From: Manuel Gunther <siebenkopf@googlemail.com> Date: Tue, 17 May 2016 13:00:12 -0600 Subject: [PATCH] Implemented Preprocessor.writes_data flag so that preprocessing can easier be skipped --- bob/bio/base/preprocessor/Filename.py | 2 +- bob/bio/base/preprocessor/Preprocessor.py | 3 ++- bob/bio/base/tools/extractor.py | 2 +- bob/bio/base/tools/preprocessor.py | 12 +++++++++--- bob/bio/base/utils/io.py | 15 +++++++++++++++ 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/bob/bio/base/preprocessor/Filename.py b/bob/bio/base/preprocessor/Filename.py index 8d2f8ba0..a1ab0f64 100644 --- a/bob/bio/base/preprocessor/Filename.py +++ b/bob/bio/base/preprocessor/Filename.py @@ -27,7 +27,7 @@ class Filename (Preprocessor): """ def __init__(self): - pass + Preprocessor.__init__(self, writes_data=False) # The call function (i.e. the operator() in C++ terms) diff --git a/bob/bio/base/preprocessor/Preprocessor.py b/bob/bio/base/preprocessor/Preprocessor.py index a01026a7..f5a7e16a 100644 --- a/bob/bio/base/preprocessor/Preprocessor.py +++ b/bob/bio/base/preprocessor/Preprocessor.py @@ -33,9 +33,10 @@ class Preprocessor: A list of keyword arguments to be written in the :py:meth:`__str__` function. """ - def __init__(self, **kwargs): + def __init__(self, writes_data = True, **kwargs): # Each class needs to have a constructor taking # all the parameters that are required for the preprocessing as arguments + self.writes_data = writes_data self._kwargs = kwargs pass diff --git a/bob/bio/base/tools/extractor.py b/bob/bio/base/tools/extractor.py index 5872a4ba..a28016af 100644 --- a/bob/bio/base/tools/extractor.py +++ b/bob/bio/base/tools/extractor.py @@ -102,7 +102,7 @@ def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_ data_file = data_files[i] feature_file = feature_files[i] - if not os.path.exists(data_file): + if not os.path.exists(data_file) and preprocessor.writes_data: if allow_missing_files: logger.debug("... Cannot find preprocessed data file %s; skipping", data_file) continue diff --git a/bob/bio/base/tools/preprocessor.py b/bob/bio/base/tools/preprocessor.py index d28b2852..01dea478 100644 --- a/bob/bio/base/tools/preprocessor.py +++ b/bob/bio/base/tools/preprocessor.py @@ -33,6 +33,11 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files force : bool If given, files are regenerated, even if they already exist. """ + if not preprocessor.writes_data: + # The preprocessor does not write anything, so no need to call it + logger.info("Skipping preprocessing as preprocessor does not write any data") + return + # the file selector object fs = FileSelector.instance() @@ -110,9 +115,10 @@ def read_preprocessed_data(file_names, preprocessor, split_by_client = False, al preprocessed : [object] or [[object]] The list of preprocessed data, in the same order as in the ``file_names``. """ - file_names = utils.filter_missing_files(file_names, split_by_client, allow_missing_files) + file_names = utils.filter_missing_files(file_names, split_by_client, allow_missing_files and preprocessor.writes_data) if split_by_client: - return [[preprocessor.read_data(f) for f in client_files] for client_files in file_names] + preprocessed = [[preprocessor.read_data(f) for f in client_files] for client_files in file_names] else: - return [preprocessor.read_data(f) for f in file_names] + preprocessed = [preprocessor.read_data(f) for f in file_names] + return utils.filter_none(preprocessed, split_by_client) diff --git a/bob/bio/base/utils/io.py b/bob/bio/base/utils/io.py index 1e2851bc..1e73f499 100644 --- a/bob/bio/base/utils/io.py +++ b/bob/bio/base/utils/io.py @@ -22,6 +22,21 @@ def filter_missing_files(file_names, split_by_client=False, allow_missing_files= return existing_files +def filter_none(data, split_by_client=False): + """This function filters out ``None`` values from the given list (or list of lists, when ``split_by_client`` is enabled).""" + + if split_by_client: + # filter out missing files and empty clients + existing_data = [[d for d in client_data if d is not None] for client_data in data] + existing_data = [client_data for client_data in existing_data if client_data] + else: + # filter out missing files + existing_data = [d for d in data if d is not None] + return existing_data + + + + def check_file(filename, force, expected_file_size = 1): """Checks if the file with the given ``filename`` exists and has size greater or equal to ``expected_file_size``. If the file is to small, **or** if the ``force`` option is set to ``True``, the file is removed. -- GitLab