Commit 1d88685f authored by Manuel Günther's avatar Manuel Günther Committed by Tiago de Freitas Pereira
Browse files

First version of proposed handling of original data

parent 10f2493c
......@@ -14,7 +14,8 @@ class Filename (Preprocessor):
"""
def __init__(self):
Preprocessor.__init__(self, writes_data=False)
# call base class constructor, using a custom ``load_function`` that does nothing and always returns None
Preprocessor.__init__(self, writes_data=False, load_function = lambda x,y,z: None)
# The call function (i.e. the operator() in C++ terms)
......@@ -40,28 +41,6 @@ class Filename (Preprocessor):
return 1
############################################################
### Special functions that might be overwritten on need
############################################################
def read_original_data(self, original_file_name):
"""read_original_data(original_file_name) -> data
This function does **not** read the original image..
**Parameters:**
``original_file_name`` : any
ignored
**Returns:**
``data`` : ``None``
throughout.
"""
pass
def write_data(self, data, data_file):
"""Does **not** write any data.
......
......@@ -4,6 +4,7 @@
# @date: Tue Oct 2 12:12:39 CEST 2012
import bob.io.base
import bob.db.base
import os
......@@ -18,14 +19,19 @@ class Preprocessor:
writes_data : bool
Select, if the preprocessor actually writes preprocessed images, or if it is simply returning values.
read_original_data: callable
This function is used to read the original data from file.
It takes three inputs: A :py:class:`bob.bio.db.BioFile`, the original directory (as ``str``) and the original extension (as ``str``).
kwargs : ``key=value`` pairs
A list of keyword arguments to be written in the :py:meth:`__str__` function.
"""
def __init__(self, writes_data = True, **kwargs):
def __init__(self, writes_data = True, load_function = bob.db.base.File.load, **kwargs):
# Each class needs to have a constructor taking
# all the parameters that are required for the preprocessing as arguments
self.writes_data = writes_data
self.read_original_data = load_function
self._kwargs = kwargs
pass
......@@ -70,25 +76,6 @@ class Preprocessor:
### Special functions that might be overwritten on need
############################################################
def read_original_data(self, original_file_name):
"""read_original_data(original_file_name) -> data
Reads the *original* data (usually something like an image) from file.
In this base class implementation, it uses :py:func:`bob.io.base.load` to do that.
If you have different format, please overwrite this function.
**Parameters:**
original_file_name : str
The file name to read the original data from.
**Returns:**
data : object (usually :py:class:`numpy.ndarray`)
The original data read from file.
"""
return bob.io.base.load(original_file_name)
def write_data(self, data, data_file):
"""Writes the given *preprocessed* data to a file with the given name.
......
......@@ -6,6 +6,7 @@ import bob.core
logger = bob.core.log.setup("bob.bio.base")
import bob.bio.base
import bob.bio.db
import bob.db.base
import numpy
......@@ -46,7 +47,7 @@ def main(command_line_parameters=None):
preprocessor = bob.bio.base.load_resource(' '.join(args.preprocessor), "preprocessor")
logger.debug("Loading input data from file '%s'%s", args.input_file, " and '%s'" % args.annotation_file if args.annotation_file is not None else "")
data = preprocessor.read_original_data(args.input_file)
data = preprocessor.read_original_data(bob.bio.db.BioFile(1, args.input_file, 2), "", "")
annotations = bob.db.base.annotations.read_annotation_file(args.annotation_file, 'named') if args.annotation_file is not None else None
logger.info("Preprocessing data")
......
from bob.bio.db import ZTBioDatabase
from bob.bio.db import ZTBioDatabase, AtntBioDatabase
from bob.bio.base.test.utils import atnt_database_directory
......@@ -14,14 +14,13 @@ class DummyDatabase(ZTBioDatabase):
training_depends_on_protocol=False,
models_depend_on_protocol=False
)
import bob.db.atnt
self.__db = bob.db.atnt.Database()
self.__db = AtntBioDatabase()
def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs):
return self.__db.model_ids(groups, protocol)
return self.__db.model_ids_with_protocol(groups, protocol)
def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs):
return self.__db.objects(model_ids, groups, purposes, protocol, **kwargs)
return self.__db.objects(groups, protocol, purposes, model_ids, **kwargs)
def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs):
return []
......@@ -30,7 +29,7 @@ class DummyDatabase(ZTBioDatabase):
return []
def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs):
return self.__db.model_ids(groups)
return self.__db.model_ids_with_protocol(groups, protocol)
def t_enroll_files(self, t_model_id, group='dev'):
return self.enroll_files(t_model_id, group)
......
import bob.bio.base
import bob.bio.db
from . import utils
......@@ -9,7 +10,7 @@ def test_filename():
assert isinstance(preprocessor, bob.bio.base.preprocessor.Filename)
# try to load the original image
assert preprocessor.read_original_data("/any/path") is None
assert preprocessor.read_original_data(bob.bio.db.BioFile(1,"2",3), "/any/path", ".any.extension") is None
# try to process
assert preprocessor(None, None) == 1
......
......@@ -110,6 +110,10 @@ class FileSelector:
"""Returns the list of original data that can be used for preprocessing."""
return self.database.original_file_names(self.database.all_files(groups=groups))
def original_data_list_files(self, groups = None):
"""Returns the list of original data that can be used for preprocessing."""
return (self.database.all_files(groups=groups), self.database.original_directory, self.database.original_extension)
def annotation_list(self, groups = None):
"""Returns the list of annotations objects."""
return self.database.all_files(groups=groups)
......
......@@ -42,7 +42,7 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
fs = FileSelector.instance()
# get the file lists
data_files = fs.original_data_list(groups=groups)
data_files, original_directory, original_extension = fs.original_data_list_files(groups=groups)
preprocessed_data_files = fs.preprocessed_data_list(groups=groups)
# select a subset of keys to iterate
......@@ -60,12 +60,13 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
# iterate over the selected files
for i in index_range:
preprocessed_data_file = preprocessed_data_files[i]
file_name = data_files[i]
file_object = data_files[i]
file_name = file_object.make_path(original_directory, original_extension)
# check for existence
if not utils.check_file(preprocessed_data_file, force, 1000):
logger.debug("... Processing original data file '%s'", file_name)
data = preprocessor.read_original_data(file_name)
data = preprocessor.read_original_data(file_object, original_directory, original_extension)
# create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere)
bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment