diff --git a/bob/bio/base/preprocessor/Filename.py b/bob/bio/base/preprocessor/Filename.py index 06db452fe7b5d051d3f949a86fad7eff88a07c56..a78af095cee1ca411107283dc657d62ab0c88088 100644 --- a/bob/bio/base/preprocessor/Filename.py +++ b/bob/bio/base/preprocessor/Filename.py @@ -14,7 +14,8 @@ class Filename (Preprocessor): """ def __init__(self): - Preprocessor.__init__(self, writes_data=False) + # call base class constructor, using a custom ``load_function`` that does nothing and always returns None + Preprocessor.__init__(self, writes_data=False, load_function = lambda x,y,z: None) # The call function (i.e. the operator() in C++ terms) @@ -40,28 +41,6 @@ class Filename (Preprocessor): return 1 - ############################################################ - ### Special functions that might be overwritten on need - ############################################################ - - def read_original_data(self, original_file_name): - """read_original_data(original_file_name) -> data - - This function does **not** read the original image.. - - **Parameters:** - - ``original_file_name`` : any - ignored - - **Returns:** - - ``data`` : ``None`` - throughout. - """ - pass - - def write_data(self, data, data_file): """Does **not** write any data. diff --git a/bob/bio/base/preprocessor/Preprocessor.py b/bob/bio/base/preprocessor/Preprocessor.py index 268a7b7a16f2fb67864a98f526eca7e64f78f4ca..3534cdde33f0ff47015d191514fad04a45fc1f01 100644 --- a/bob/bio/base/preprocessor/Preprocessor.py +++ b/bob/bio/base/preprocessor/Preprocessor.py @@ -4,6 +4,7 @@ # @date: Tue Oct 2 12:12:39 CEST 2012 import bob.io.base +import bob.db.base import os @@ -18,14 +19,19 @@ class Preprocessor: writes_data : bool Select, if the preprocessor actually writes preprocessed images, or if it is simply returning values. + read_original_data: callable + This function is used to read the original data from file. + It takes three inputs: A :py:class:`bob.bio.db.BioFile`, the original directory (as ``str``) and the original extension (as ``str``). + kwargs : ``key=value`` pairs A list of keyword arguments to be written in the :py:meth:`__str__` function. """ - def __init__(self, writes_data = True, **kwargs): + def __init__(self, writes_data = True, load_function = bob.db.base.File.load, **kwargs): # Each class needs to have a constructor taking # all the parameters that are required for the preprocessing as arguments self.writes_data = writes_data + self.read_original_data = load_function self._kwargs = kwargs pass @@ -70,25 +76,6 @@ class Preprocessor: ### Special functions that might be overwritten on need ############################################################ - def read_original_data(self, original_file_name): - """read_original_data(original_file_name) -> data - - Reads the *original* data (usually something like an image) from file. - In this base class implementation, it uses :py:func:`bob.io.base.load` to do that. - If you have different format, please overwrite this function. - - **Parameters:** - - original_file_name : str - The file name to read the original data from. - - **Returns:** - - data : object (usually :py:class:`numpy.ndarray`) - The original data read from file. - """ - return bob.io.base.load(original_file_name) - def write_data(self, data, data_file): """Writes the given *preprocessed* data to a file with the given name. diff --git a/bob/bio/base/script/preprocess.py b/bob/bio/base/script/preprocess.py index 1014196ac414a4cb7307fdbc1db5e1171cc95de9..db58f297bf89d4a034be1400da833af94dcd5d88 100644 --- a/bob/bio/base/script/preprocess.py +++ b/bob/bio/base/script/preprocess.py @@ -6,6 +6,7 @@ import bob.core logger = bob.core.log.setup("bob.bio.base") import bob.bio.base +import bob.bio.db import bob.db.base import numpy @@ -46,7 +47,7 @@ def main(command_line_parameters=None): preprocessor = bob.bio.base.load_resource(' '.join(args.preprocessor), "preprocessor") logger.debug("Loading input data from file '%s'%s", args.input_file, " and '%s'" % args.annotation_file if args.annotation_file is not None else "") - data = preprocessor.read_original_data(args.input_file) + data = preprocessor.read_original_data(bob.bio.db.BioFile(1, args.input_file, 2), "", "") annotations = bob.db.base.annotations.read_annotation_file(args.annotation_file, 'named') if args.annotation_file is not None else None logger.info("Preprocessing data") diff --git a/bob/bio/base/test/dummy/database.py b/bob/bio/base/test/dummy/database.py index a44e0ec2fc2f2184d91735e195153f739ac1876e..f8061c6f4c9ec21e5ca2d84bf1a02489e851c04c 100644 --- a/bob/bio/base/test/dummy/database.py +++ b/bob/bio/base/test/dummy/database.py @@ -1,4 +1,4 @@ -from bob.bio.db import ZTBioDatabase +from bob.bio.db import ZTBioDatabase, AtntBioDatabase from bob.bio.base.test.utils import atnt_database_directory @@ -14,14 +14,13 @@ class DummyDatabase(ZTBioDatabase): training_depends_on_protocol=False, models_depend_on_protocol=False ) - import bob.db.atnt - self.__db = bob.db.atnt.Database() + self.__db = AtntBioDatabase() def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs): - return self.__db.model_ids(groups, protocol) + return self.__db.model_ids_with_protocol(groups, protocol) def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): - return self.__db.objects(model_ids, groups, purposes, protocol, **kwargs) + return self.__db.objects(groups, protocol, purposes, model_ids, **kwargs) def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs): return [] @@ -30,7 +29,7 @@ class DummyDatabase(ZTBioDatabase): return [] def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs): - return self.__db.model_ids(groups) + return self.__db.model_ids_with_protocol(groups, protocol) def t_enroll_files(self, t_model_id, group='dev'): return self.enroll_files(t_model_id, group) diff --git a/bob/bio/base/test/test_preprocessor.py b/bob/bio/base/test/test_preprocessor.py index bd78b60d25e4cd716cb8e67e0d2445a22c98480c..577f73ad8da15fa6463090c5fb0db17088539b88 100644 --- a/bob/bio/base/test/test_preprocessor.py +++ b/bob/bio/base/test/test_preprocessor.py @@ -1,4 +1,5 @@ import bob.bio.base +import bob.bio.db from . import utils @@ -9,7 +10,7 @@ def test_filename(): assert isinstance(preprocessor, bob.bio.base.preprocessor.Filename) # try to load the original image - assert preprocessor.read_original_data("/any/path") is None + assert preprocessor.read_original_data(bob.bio.db.BioFile(1,"2",3), "/any/path", ".any.extension") is None # try to process assert preprocessor(None, None) == 1 diff --git a/bob/bio/base/tools/FileSelector.py b/bob/bio/base/tools/FileSelector.py index 8a623d3efb7dea8e860859fa2b251325102dec1f..16592e26b1fd04593eb3d84e442daf3260ff93ba 100644 --- a/bob/bio/base/tools/FileSelector.py +++ b/bob/bio/base/tools/FileSelector.py @@ -110,6 +110,10 @@ class FileSelector: """Returns the list of original data that can be used for preprocessing.""" return self.database.original_file_names(self.database.all_files(groups=groups)) + def original_data_list_files(self, groups = None): + """Returns the list of original data that can be used for preprocessing.""" + return (self.database.all_files(groups=groups), self.database.original_directory, self.database.original_extension) + def annotation_list(self, groups = None): """Returns the list of annotations objects.""" return self.database.all_files(groups=groups) diff --git a/bob/bio/base/tools/preprocessor.py b/bob/bio/base/tools/preprocessor.py index 01dea478a7976255216d159ba4d1653d9c1e05b5..18cd9882a57924a472de60f3759e10f22335d4fb 100644 --- a/bob/bio/base/tools/preprocessor.py +++ b/bob/bio/base/tools/preprocessor.py @@ -42,7 +42,7 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files fs = FileSelector.instance() # get the file lists - data_files = fs.original_data_list(groups=groups) + data_files, original_directory, original_extension = fs.original_data_list_files(groups=groups) preprocessed_data_files = fs.preprocessed_data_list(groups=groups) # select a subset of keys to iterate @@ -60,12 +60,13 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files # iterate over the selected files for i in index_range: preprocessed_data_file = preprocessed_data_files[i] - file_name = data_files[i] + file_object = data_files[i] + file_name = file_object.make_path(original_directory, original_extension) # check for existence if not utils.check_file(preprocessed_data_file, force, 1000): logger.debug("... Processing original data file '%s'", file_name) - data = preprocessor.read_original_data(file_name) + data = preprocessor.read_original_data(file_object, original_directory, original_extension) # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere) bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file))