Skip to content
Snippets Groups Projects
Commit b6c65c06 authored by Manuel Günther's avatar Manuel Günther
Browse files

Implemented Preprocessor.writes_data flag so that preprocessing can easier be skipped

parent 9a25a354
No related branches found
No related tags found
No related merge requests found
...@@ -27,7 +27,7 @@ class Filename (Preprocessor): ...@@ -27,7 +27,7 @@ class Filename (Preprocessor):
""" """
def __init__(self): def __init__(self):
pass Preprocessor.__init__(self, writes_data=False)
# The call function (i.e. the operator() in C++ terms) # The call function (i.e. the operator() in C++ terms)
......
...@@ -33,9 +33,10 @@ class Preprocessor: ...@@ -33,9 +33,10 @@ class Preprocessor:
A list of keyword arguments to be written in the :py:meth:`__str__` function. A list of keyword arguments to be written in the :py:meth:`__str__` function.
""" """
def __init__(self, **kwargs): def __init__(self, writes_data = True, **kwargs):
# Each class needs to have a constructor taking # Each class needs to have a constructor taking
# all the parameters that are required for the preprocessing as arguments # all the parameters that are required for the preprocessing as arguments
self.writes_data = writes_data
self._kwargs = kwargs self._kwargs = kwargs
pass pass
......
...@@ -102,7 +102,7 @@ def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_ ...@@ -102,7 +102,7 @@ def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_
data_file = data_files[i] data_file = data_files[i]
feature_file = feature_files[i] feature_file = feature_files[i]
if not os.path.exists(data_file): if not os.path.exists(data_file) and preprocessor.writes_data:
if allow_missing_files: if allow_missing_files:
logger.debug("... Cannot find preprocessed data file %s; skipping", data_file) logger.debug("... Cannot find preprocessed data file %s; skipping", data_file)
continue continue
......
...@@ -33,6 +33,11 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files ...@@ -33,6 +33,11 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
force : bool force : bool
If given, files are regenerated, even if they already exist. If given, files are regenerated, even if they already exist.
""" """
if not preprocessor.writes_data:
# The preprocessor does not write anything, so no need to call it
logger.info("Skipping preprocessing as preprocessor does not write any data")
return
# the file selector object # the file selector object
fs = FileSelector.instance() fs = FileSelector.instance()
...@@ -110,9 +115,10 @@ def read_preprocessed_data(file_names, preprocessor, split_by_client = False, al ...@@ -110,9 +115,10 @@ def read_preprocessed_data(file_names, preprocessor, split_by_client = False, al
preprocessed : [object] or [[object]] preprocessed : [object] or [[object]]
The list of preprocessed data, in the same order as in the ``file_names``. The list of preprocessed data, in the same order as in the ``file_names``.
""" """
file_names = utils.filter_missing_files(file_names, split_by_client, allow_missing_files) file_names = utils.filter_missing_files(file_names, split_by_client, allow_missing_files and preprocessor.writes_data)
if split_by_client: if split_by_client:
return [[preprocessor.read_data(f) for f in client_files] for client_files in file_names] preprocessed = [[preprocessor.read_data(f) for f in client_files] for client_files in file_names]
else: else:
return [preprocessor.read_data(f) for f in file_names] preprocessed = [preprocessor.read_data(f) for f in file_names]
return utils.filter_none(preprocessed, split_by_client)
...@@ -22,6 +22,21 @@ def filter_missing_files(file_names, split_by_client=False, allow_missing_files= ...@@ -22,6 +22,21 @@ def filter_missing_files(file_names, split_by_client=False, allow_missing_files=
return existing_files return existing_files
def filter_none(data, split_by_client=False):
"""This function filters out ``None`` values from the given list (or list of lists, when ``split_by_client`` is enabled)."""
if split_by_client:
# filter out missing files and empty clients
existing_data = [[d for d in client_data if d is not None] for client_data in data]
existing_data = [client_data for client_data in existing_data if client_data]
else:
# filter out missing files
existing_data = [d for d in data if d is not None]
return existing_data
def check_file(filename, force, expected_file_size = 1): def check_file(filename, force, expected_file_size = 1):
"""Checks if the file with the given ``filename`` exists and has size greater or equal to ``expected_file_size``. """Checks if the file with the given ``filename`` exists and has size greater or equal to ``expected_file_size``.
If the file is to small, **or** if the ``force`` option is set to ``True``, the file is removed. If the file is to small, **or** if the ``force`` option is set to ``True``, the file is removed.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment