Skip to content
Snippets Groups Projects
Commit 90f3840f authored by Amir Mohammadi's avatar Amir Mohammadi
Browse files

You can now specify a minimum file size for saved files

parent 6f3f53fa
No related branches found
No related tags found
1 merge request!58You can now specify a minimum file size for saved files
Pipeline #
......@@ -46,6 +46,29 @@ class Algorithm:
The way, scores are fused when multiple probes are available.
See :py:func:`bob.bio.base.score_fusion_strategy` for possible values.
min_projector_file_size : int
The minimum file size of projector_file in bytes. If the saved file is
smaller than this, it is assumed to be corrupt and it will be generated
again.
min_projected_file_size : int
The minimum file size of projected_file in bytes. If the saved file is
smaller than this, it is assumed to be corrupt and it will be generated
again.
min_enroller_file_size : int
The minimum file size of enroller_file in bytes. If the saved file is
smaller than this, it is assumed to be corrupt and it will be generated
again.
min_model_file_size : int
The minimum file size of model_file in bytes. If the saved file is smaller
than this, it is assumed to be corrupt and it will be generated again.
min_t_model_file_size : int
The minimum file size of t_model_file in bytes. If the saved file is smaller
than this, it is assumed to be corrupt and it will be generated again.
kwargs : ``key=value`` pairs
A list of keyword arguments to be written in the `__str__` function.
......@@ -61,6 +84,11 @@ class Algorithm:
multiple_model_scoring = 'average', # by default, compute the average between several models and the probe
multiple_probe_scoring = 'average', # by default, compute the average between the model and several probes
min_projector_file_size=1000,
min_projected_file_size=1000,
min_enroller_file_size=1000,
min_model_file_size=1000,
min_t_model_file_size=1000,
**kwargs # parameters from the derived class that should be reported in the __str__() function
):
self.performs_projection = performs_projection
......@@ -70,6 +98,11 @@ class Algorithm:
self.requires_enroller_training = requires_enroller_training
self.model_fusion_function = utils.score_fusion_strategy(multiple_model_scoring)
self.probe_fusion_function = utils.score_fusion_strategy(multiple_probe_scoring)
self.min_projector_file_size = min_projector_file_size
self.min_projected_file_size = min_projected_file_size
self.min_enroller_file_size = min_enroller_file_size
self.min_model_file_size = min_model_file_size
self.min_t_model_file_size = min_t_model_file_size
self._kwargs = kwargs
self._kwargs.update({'multiple_model_scoring':multiple_model_scoring, 'multiple_probe_scoring':multiple_probe_scoring})
......
......@@ -23,6 +23,16 @@ class Extractor:
Set this flag to ``True`` if your feature extractor requires the training data to be split by clients.
Ignored, if ``requires_training`` is ``False``
min_extractor_file_size : int
The minimum file size of a saved extractor file for extractors that
require training in bytes. If the saved file size is smaller than this, it
is assumed to be a corrupt file and the extractor will be trained again.
min_feature_file_size : int
The minimum file size of extracted features in bytes. If the saved file
size is smaller than this, it is assumed to be a corrupt file and the
features will be extracted again.
kwargs : ``key=value`` pairs
A list of keyword arguments to be written in the `__str__` function.
"""
......@@ -31,12 +41,16 @@ class Extractor:
self,
requires_training = False, # enable, if your extractor needs training
split_training_data_by_client = False, # enable, if your extractor needs the training files sorted by client
min_extractor_file_size=1000,
min_feature_file_size=1000,
**kwargs # the parameters of the extractor, to be written in the __str__() method
):
# Each class needs to have a constructor taking
# all the parameters that are required for the feature extraction as arguments
self.requires_training = requires_training
self.split_training_data_by_client = split_training_data_by_client
self.min_extractor_file_size = min_extractor_file_size
self.min_feature_file_size = min_feature_file_size
self._kwargs = kwargs
......
......@@ -19,17 +19,24 @@ class Preprocessor:
This function is used to read the original data from file.
It takes three inputs: A :py:class:`bob.bio.base.database.BioFile` (or one of its derivatives), the original directory (as ``str``) and the original extension (as ``str``).
min_preprocessed_file_size: int
The minimum file size of a saved preprocessd data in bytes. If the saved
preprocessed data file size is smaller than this, it is assumed to be a
corrupt file and the data will be processed again.
kwargs : ``key=value`` pairs
A list of keyword arguments to be written in the `__str__` function.
"""
def __init__(self, writes_data=True, read_original_data=None, **kwargs):
def __init__(self, writes_data=True, read_original_data=None,
min_preprocessed_file_size=1000, **kwargs):
# Each class needs to have a constructor taking
# all the parameters that are required for the preprocessing as arguments
self.writes_data = writes_data
if read_original_data is None:
read_original_data = utils.read_original_data
self.read_original_data = read_original_data
self.min_preprocessed_file_size = min_preprocessed_file_size
self._kwargs = kwargs
pass
......
......@@ -38,7 +38,8 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F
# the file selector object
fs = FileSelector.instance()
if utils.check_file(fs.projector_file, force, 1000):
if utils.check_file(fs.projector_file, force,
algorithm.min_projector_file_size):
logger.info("- Projection: projector '%s' already exists.", fs.projector_file)
else:
bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file))
......@@ -120,7 +121,8 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f
logger.error("Cannot find extracted feature file %s", feature_file)
if not utils.check_file(projected_file, force, 1000):
if not utils.check_file(projected_file, force,
algorithm.min_projected_file_size):
logger.debug("... Projecting features for file '%s'", feature_file)
# create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere)
bob.io.base.create_directories_safe(os.path.dirname(projected_file))
......@@ -175,7 +177,8 @@ def train_enroller(algorithm, extractor, allow_missing_files = False, force = Fa
# the file selector object
fs = FileSelector.instance()
if utils.check_file(fs.enroller_file, force, 1000):
if utils.check_file(fs.enroller_file, force,
algorithm.min_enroller_file_size):
logger.info("- Enrollment: enroller '%s' already exists.", fs.enroller_file)
else:
# define the tool that is required to read the features
......@@ -258,7 +261,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
model_file = fs.model_file(model_id, group)
# Removes old file if required
if not utils.check_file(model_file, force, 1000):
if not utils.check_file(model_file, force,
algorithm.min_model_file_size):
enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted')
if allow_missing_files:
......@@ -305,7 +309,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
t_model_file = fs.t_model_file(t_model_id, group)
# Removes old file if required
if not utils.check_file(t_model_file, force, 1000):
if not utils.check_file(t_model_file, force,
algorithm.min_t_model_file_size):
t_enroll_files = fs.t_enroll_files(t_model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted')
if allow_missing_files:
......
......@@ -38,7 +38,8 @@ def train_extractor(extractor, preprocessor, allow_missing_files = False, force
# the file selector object
fs = FileSelector.instance()
# the file to write
if utils.check_file(fs.extractor_file, force, 1000):
if utils.check_file(fs.extractor_file, force,
extractor.min_extractor_file_size):
logger.info("- Extraction: extractor '%s' already exists.", fs.extractor_file)
else:
bob.io.base.create_directories_safe(os.path.dirname(fs.extractor_file))
......@@ -109,7 +110,8 @@ def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_
else:
logger.error("Cannot find preprocessed data file %s", data_file)
if not utils.check_file(feature_file, force, 1000):
if not utils.check_file(feature_file, force,
extractor.min_feature_file_size):
logger.debug("... Extracting features for data file '%s'", data_file)
# create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere)
bob.io.base.create_directories_safe(os.path.dirname(feature_file))
......
......@@ -65,7 +65,8 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
file_name = file_object.make_path(original_directory, original_extension)
# check for existence
if not utils.check_file(preprocessed_data_file, force, 1000):
if not utils.check_file(preprocessed_data_file, force,
preprocessor.min_preprocessed_file_size):
logger.debug("... Processing original data file '%s'", file_name)
data = preprocessor.read_original_data(file_object, original_directory, original_extension)
# create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment