Commit 1d88685f authored by Manuel Günther's avatar Manuel Günther Committed by Tiago de Freitas Pereira
Browse files

First version of proposed handling of original data

parent 10f2493c
...@@ -14,7 +14,8 @@ class Filename (Preprocessor): ...@@ -14,7 +14,8 @@ class Filename (Preprocessor):
""" """
def __init__(self): def __init__(self):
Preprocessor.__init__(self, writes_data=False) # call base class constructor, using a custom ``load_function`` that does nothing and always returns None
Preprocessor.__init__(self, writes_data=False, load_function = lambda x,y,z: None)
# The call function (i.e. the operator() in C++ terms) # The call function (i.e. the operator() in C++ terms)
...@@ -40,28 +41,6 @@ class Filename (Preprocessor): ...@@ -40,28 +41,6 @@ class Filename (Preprocessor):
return 1 return 1
############################################################
### Special functions that might be overwritten on need
############################################################
def read_original_data(self, original_file_name):
"""read_original_data(original_file_name) -> data
This function does **not** read the original image..
**Parameters:**
``original_file_name`` : any
ignored
**Returns:**
``data`` : ``None``
throughout.
"""
pass
def write_data(self, data, data_file): def write_data(self, data, data_file):
"""Does **not** write any data. """Does **not** write any data.
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
# @date: Tue Oct 2 12:12:39 CEST 2012 # @date: Tue Oct 2 12:12:39 CEST 2012
import bob.io.base import bob.io.base
import bob.db.base
import os import os
...@@ -18,14 +19,19 @@ class Preprocessor: ...@@ -18,14 +19,19 @@ class Preprocessor:
writes_data : bool writes_data : bool
Select, if the preprocessor actually writes preprocessed images, or if it is simply returning values. Select, if the preprocessor actually writes preprocessed images, or if it is simply returning values.
read_original_data: callable
This function is used to read the original data from file.
It takes three inputs: A :py:class:`bob.bio.db.BioFile`, the original directory (as ``str``) and the original extension (as ``str``).
kwargs : ``key=value`` pairs kwargs : ``key=value`` pairs
A list of keyword arguments to be written in the :py:meth:`__str__` function. A list of keyword arguments to be written in the :py:meth:`__str__` function.
""" """
def __init__(self, writes_data = True, **kwargs): def __init__(self, writes_data = True, load_function = bob.db.base.File.load, **kwargs):
# Each class needs to have a constructor taking # Each class needs to have a constructor taking
# all the parameters that are required for the preprocessing as arguments # all the parameters that are required for the preprocessing as arguments
self.writes_data = writes_data self.writes_data = writes_data
self.read_original_data = load_function
self._kwargs = kwargs self._kwargs = kwargs
pass pass
...@@ -70,25 +76,6 @@ class Preprocessor: ...@@ -70,25 +76,6 @@ class Preprocessor:
### Special functions that might be overwritten on need ### Special functions that might be overwritten on need
############################################################ ############################################################
def read_original_data(self, original_file_name):
"""read_original_data(original_file_name) -> data
Reads the *original* data (usually something like an image) from file.
In this base class implementation, it uses :py:func:`bob.io.base.load` to do that.
If you have different format, please overwrite this function.
**Parameters:**
original_file_name : str
The file name to read the original data from.
**Returns:**
data : object (usually :py:class:`numpy.ndarray`)
The original data read from file.
"""
return bob.io.base.load(original_file_name)
def write_data(self, data, data_file): def write_data(self, data, data_file):
"""Writes the given *preprocessed* data to a file with the given name. """Writes the given *preprocessed* data to a file with the given name.
......
...@@ -6,6 +6,7 @@ import bob.core ...@@ -6,6 +6,7 @@ import bob.core
logger = bob.core.log.setup("bob.bio.base") logger = bob.core.log.setup("bob.bio.base")
import bob.bio.base import bob.bio.base
import bob.bio.db
import bob.db.base import bob.db.base
import numpy import numpy
...@@ -46,7 +47,7 @@ def main(command_line_parameters=None): ...@@ -46,7 +47,7 @@ def main(command_line_parameters=None):
preprocessor = bob.bio.base.load_resource(' '.join(args.preprocessor), "preprocessor") preprocessor = bob.bio.base.load_resource(' '.join(args.preprocessor), "preprocessor")
logger.debug("Loading input data from file '%s'%s", args.input_file, " and '%s'" % args.annotation_file if args.annotation_file is not None else "") logger.debug("Loading input data from file '%s'%s", args.input_file, " and '%s'" % args.annotation_file if args.annotation_file is not None else "")
data = preprocessor.read_original_data(args.input_file) data = preprocessor.read_original_data(bob.bio.db.BioFile(1, args.input_file, 2), "", "")
annotations = bob.db.base.annotations.read_annotation_file(args.annotation_file, 'named') if args.annotation_file is not None else None annotations = bob.db.base.annotations.read_annotation_file(args.annotation_file, 'named') if args.annotation_file is not None else None
logger.info("Preprocessing data") logger.info("Preprocessing data")
......
from bob.bio.db import ZTBioDatabase from bob.bio.db import ZTBioDatabase, AtntBioDatabase
from bob.bio.base.test.utils import atnt_database_directory from bob.bio.base.test.utils import atnt_database_directory
...@@ -14,14 +14,13 @@ class DummyDatabase(ZTBioDatabase): ...@@ -14,14 +14,13 @@ class DummyDatabase(ZTBioDatabase):
training_depends_on_protocol=False, training_depends_on_protocol=False,
models_depend_on_protocol=False models_depend_on_protocol=False
) )
import bob.db.atnt self.__db = AtntBioDatabase()
self.__db = bob.db.atnt.Database()
def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs): def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs):
return self.__db.model_ids(groups, protocol) return self.__db.model_ids_with_protocol(groups, protocol)
def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs):
return self.__db.objects(model_ids, groups, purposes, protocol, **kwargs) return self.__db.objects(groups, protocol, purposes, model_ids, **kwargs)
def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs): def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs):
return [] return []
...@@ -30,7 +29,7 @@ class DummyDatabase(ZTBioDatabase): ...@@ -30,7 +29,7 @@ class DummyDatabase(ZTBioDatabase):
return [] return []
def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs): def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs):
return self.__db.model_ids(groups) return self.__db.model_ids_with_protocol(groups, protocol)
def t_enroll_files(self, t_model_id, group='dev'): def t_enroll_files(self, t_model_id, group='dev'):
return self.enroll_files(t_model_id, group) return self.enroll_files(t_model_id, group)
......
import bob.bio.base import bob.bio.base
import bob.bio.db
from . import utils from . import utils
...@@ -9,7 +10,7 @@ def test_filename(): ...@@ -9,7 +10,7 @@ def test_filename():
assert isinstance(preprocessor, bob.bio.base.preprocessor.Filename) assert isinstance(preprocessor, bob.bio.base.preprocessor.Filename)
# try to load the original image # try to load the original image
assert preprocessor.read_original_data("/any/path") is None assert preprocessor.read_original_data(bob.bio.db.BioFile(1,"2",3), "/any/path", ".any.extension") is None
# try to process # try to process
assert preprocessor(None, None) == 1 assert preprocessor(None, None) == 1
......
...@@ -110,6 +110,10 @@ class FileSelector: ...@@ -110,6 +110,10 @@ class FileSelector:
"""Returns the list of original data that can be used for preprocessing.""" """Returns the list of original data that can be used for preprocessing."""
return self.database.original_file_names(self.database.all_files(groups=groups)) return self.database.original_file_names(self.database.all_files(groups=groups))
def original_data_list_files(self, groups = None):
"""Returns the list of original data that can be used for preprocessing."""
return (self.database.all_files(groups=groups), self.database.original_directory, self.database.original_extension)
def annotation_list(self, groups = None): def annotation_list(self, groups = None):
"""Returns the list of annotations objects.""" """Returns the list of annotations objects."""
return self.database.all_files(groups=groups) return self.database.all_files(groups=groups)
......
...@@ -42,7 +42,7 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files ...@@ -42,7 +42,7 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
fs = FileSelector.instance() fs = FileSelector.instance()
# get the file lists # get the file lists
data_files = fs.original_data_list(groups=groups) data_files, original_directory, original_extension = fs.original_data_list_files(groups=groups)
preprocessed_data_files = fs.preprocessed_data_list(groups=groups) preprocessed_data_files = fs.preprocessed_data_list(groups=groups)
# select a subset of keys to iterate # select a subset of keys to iterate
...@@ -60,12 +60,13 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files ...@@ -60,12 +60,13 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
# iterate over the selected files # iterate over the selected files
for i in index_range: for i in index_range:
preprocessed_data_file = preprocessed_data_files[i] preprocessed_data_file = preprocessed_data_files[i]
file_name = data_files[i] file_object = data_files[i]
file_name = file_object.make_path(original_directory, original_extension)
# check for existence # check for existence
if not utils.check_file(preprocessed_data_file, force, 1000): if not utils.check_file(preprocessed_data_file, force, 1000):
logger.debug("... Processing original data file '%s'", file_name) logger.debug("... Processing original data file '%s'", file_name)
data = preprocessor.read_original_data(file_name) data = preprocessor.read_original_data(file_object, original_directory, original_extension)
# create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere) # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere)
bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file)) bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment