diff --git a/bob/bio/base/algorithm/Algorithm.py b/bob/bio/base/algorithm/Algorithm.py index 9cc06732f7b8ac35f1bbaaafda189cc5db1f8e37..fec131a4e295e2443b755b3185f497600ad14757 100644 --- a/bob/bio/base/algorithm/Algorithm.py +++ b/bob/bio/base/algorithm/Algorithm.py @@ -46,6 +46,29 @@ class Algorithm: The way, scores are fused when multiple probes are available. See :py:func:`bob.bio.base.score_fusion_strategy` for possible values. + min_projector_file_size : int + The minimum file size of projector_file in bytes. If the saved file is + smaller than this, it is assumed to be corrupt and it will be generated + again. + + min_projected_file_size : int + The minimum file size of projected_file in bytes. If the saved file is + smaller than this, it is assumed to be corrupt and it will be generated + again. + + min_enroller_file_size : int + The minimum file size of enroller_file in bytes. If the saved file is + smaller than this, it is assumed to be corrupt and it will be generated + again. + + min_model_file_size : int + The minimum file size of model_file in bytes. If the saved file is smaller + than this, it is assumed to be corrupt and it will be generated again. + + min_t_model_file_size : int + The minimum file size of t_model_file in bytes. If the saved file is smaller + than this, it is assumed to be corrupt and it will be generated again. + kwargs : ``key=value`` pairs A list of keyword arguments to be written in the `__str__` function. @@ -61,6 +84,11 @@ class Algorithm: multiple_model_scoring = 'average', # by default, compute the average between several models and the probe multiple_probe_scoring = 'average', # by default, compute the average between the model and several probes + min_projector_file_size=1000, + min_projected_file_size=1000, + min_enroller_file_size=1000, + min_model_file_size=1000, + min_t_model_file_size=1000, **kwargs # parameters from the derived class that should be reported in the __str__() function ): self.performs_projection = performs_projection @@ -70,6 +98,11 @@ class Algorithm: self.requires_enroller_training = requires_enroller_training self.model_fusion_function = utils.score_fusion_strategy(multiple_model_scoring) self.probe_fusion_function = utils.score_fusion_strategy(multiple_probe_scoring) + self.min_projector_file_size = min_projector_file_size + self.min_projected_file_size = min_projected_file_size + self.min_enroller_file_size = min_enroller_file_size + self.min_model_file_size = min_model_file_size + self.min_t_model_file_size = min_t_model_file_size self._kwargs = kwargs self._kwargs.update({'multiple_model_scoring':multiple_model_scoring, 'multiple_probe_scoring':multiple_probe_scoring}) diff --git a/bob/bio/base/extractor/Extractor.py b/bob/bio/base/extractor/Extractor.py index b4c238d203e4a3111404a9c9ae2ce86aad2c73fa..251d6283e2a1f2e68b9e364726379e7f79fc32bc 100644 --- a/bob/bio/base/extractor/Extractor.py +++ b/bob/bio/base/extractor/Extractor.py @@ -23,6 +23,16 @@ class Extractor: Set this flag to ``True`` if your feature extractor requires the training data to be split by clients. Ignored, if ``requires_training`` is ``False`` + min_extractor_file_size : int + The minimum file size of a saved extractor file for extractors that + require training in bytes. If the saved file size is smaller than this, it + is assumed to be a corrupt file and the extractor will be trained again. + + min_feature_file_size : int + The minimum file size of extracted features in bytes. If the saved file + size is smaller than this, it is assumed to be a corrupt file and the + features will be extracted again. + kwargs : ``key=value`` pairs A list of keyword arguments to be written in the `__str__` function. """ @@ -31,12 +41,16 @@ class Extractor: self, requires_training = False, # enable, if your extractor needs training split_training_data_by_client = False, # enable, if your extractor needs the training files sorted by client + min_extractor_file_size=1000, + min_feature_file_size=1000, **kwargs # the parameters of the extractor, to be written in the __str__() method ): # Each class needs to have a constructor taking # all the parameters that are required for the feature extraction as arguments self.requires_training = requires_training self.split_training_data_by_client = split_training_data_by_client + self.min_extractor_file_size = min_extractor_file_size + self.min_feature_file_size = min_feature_file_size self._kwargs = kwargs diff --git a/bob/bio/base/preprocessor/Preprocessor.py b/bob/bio/base/preprocessor/Preprocessor.py index 1ee0e4845851aa75c2e3753304f8971673f8ff87..ab3bb1c6fff0fc97aa8035f324e6b46eeed33993 100644 --- a/bob/bio/base/preprocessor/Preprocessor.py +++ b/bob/bio/base/preprocessor/Preprocessor.py @@ -19,17 +19,24 @@ class Preprocessor: This function is used to read the original data from file. It takes three inputs: A :py:class:`bob.bio.base.database.BioFile` (or one of its derivatives), the original directory (as ``str``) and the original extension (as ``str``). + min_preprocessed_file_size: int + The minimum file size of a saved preprocessd data in bytes. If the saved + preprocessed data file size is smaller than this, it is assumed to be a + corrupt file and the data will be processed again. + kwargs : ``key=value`` pairs A list of keyword arguments to be written in the `__str__` function. """ - def __init__(self, writes_data=True, read_original_data=None, **kwargs): + def __init__(self, writes_data=True, read_original_data=None, + min_preprocessed_file_size=1000, **kwargs): # Each class needs to have a constructor taking # all the parameters that are required for the preprocessing as arguments self.writes_data = writes_data if read_original_data is None: read_original_data = utils.read_original_data self.read_original_data = read_original_data + self.min_preprocessed_file_size = min_preprocessed_file_size self._kwargs = kwargs pass diff --git a/bob/bio/base/tools/algorithm.py b/bob/bio/base/tools/algorithm.py index 97740ca143efd46b04ee0fc10cf2604a713ecf1a..98486e9da03c3516cabba71bf42e57b2025ec1f6 100644 --- a/bob/bio/base/tools/algorithm.py +++ b/bob/bio/base/tools/algorithm.py @@ -38,7 +38,8 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F # the file selector object fs = FileSelector.instance() - if utils.check_file(fs.projector_file, force, 1000): + if utils.check_file(fs.projector_file, force, + algorithm.min_projector_file_size): logger.info("- Projection: projector '%s' already exists.", fs.projector_file) else: bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file)) @@ -120,7 +121,8 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f logger.error("Cannot find extracted feature file %s", feature_file) - if not utils.check_file(projected_file, force, 1000): + if not utils.check_file(projected_file, force, + algorithm.min_projected_file_size): logger.debug("... Projecting features for file '%s'", feature_file) # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere) bob.io.base.create_directories_safe(os.path.dirname(projected_file)) @@ -175,7 +177,8 @@ def train_enroller(algorithm, extractor, allow_missing_files = False, force = Fa # the file selector object fs = FileSelector.instance() - if utils.check_file(fs.enroller_file, force, 1000): + if utils.check_file(fs.enroller_file, force, + algorithm.min_enroller_file_size): logger.info("- Enrollment: enroller '%s' already exists.", fs.enroller_file) else: # define the tool that is required to read the features @@ -258,7 +261,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev model_file = fs.model_file(model_id, group) # Removes old file if required - if not utils.check_file(model_file, force, 1000): + if not utils.check_file(model_file, force, + algorithm.min_model_file_size): enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') if allow_missing_files: @@ -305,7 +309,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev t_model_file = fs.t_model_file(t_model_id, group) # Removes old file if required - if not utils.check_file(t_model_file, force, 1000): + if not utils.check_file(t_model_file, force, + algorithm.min_t_model_file_size): t_enroll_files = fs.t_enroll_files(t_model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') if allow_missing_files: diff --git a/bob/bio/base/tools/extractor.py b/bob/bio/base/tools/extractor.py index a28016af45925dce79f2cdf01a93f34262b2f57e..4940ec4d705767a26f8bf154583c9735333cfa00 100644 --- a/bob/bio/base/tools/extractor.py +++ b/bob/bio/base/tools/extractor.py @@ -38,7 +38,8 @@ def train_extractor(extractor, preprocessor, allow_missing_files = False, force # the file selector object fs = FileSelector.instance() # the file to write - if utils.check_file(fs.extractor_file, force, 1000): + if utils.check_file(fs.extractor_file, force, + extractor.min_extractor_file_size): logger.info("- Extraction: extractor '%s' already exists.", fs.extractor_file) else: bob.io.base.create_directories_safe(os.path.dirname(fs.extractor_file)) @@ -109,7 +110,8 @@ def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_ else: logger.error("Cannot find preprocessed data file %s", data_file) - if not utils.check_file(feature_file, force, 1000): + if not utils.check_file(feature_file, force, + extractor.min_feature_file_size): logger.debug("... Extracting features for data file '%s'", data_file) # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere) bob.io.base.create_directories_safe(os.path.dirname(feature_file)) diff --git a/bob/bio/base/tools/preprocessor.py b/bob/bio/base/tools/preprocessor.py index 7916725d117d238344c956c1fb904d5123cc3bca..6b81f2a1146c648db0eb0f9f478cf4d8a40bedaa 100644 --- a/bob/bio/base/tools/preprocessor.py +++ b/bob/bio/base/tools/preprocessor.py @@ -65,7 +65,8 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files file_name = file_object.make_path(original_directory, original_extension) # check for existence - if not utils.check_file(preprocessed_data_file, force, 1000): + if not utils.check_file(preprocessed_data_file, force, + preprocessor.min_preprocessed_file_size): logger.debug("... Processing original data file '%s'", file_name) data = preprocessor.read_original_data(file_object, original_directory, original_extension) # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere)