Commit 9025548f authored by Pavel KORSHUNOV's avatar Pavel KORSHUNOV

support for extractor training

parent 4f3f2734
Pipeline #7281 failed with stages
in 3 minutes and 11 seconds
......@@ -31,15 +31,16 @@ def parse_arguments(command_line_parameters, exclude_resources_from=[]):
# Add sub-tasks that can be executed by this script
parser = parsers['main']
parser.add_argument('--sub-task',
choices=('preprocess', 'extract', 'train-projector', 'project', 'compute-scores'),
choices=('preprocess', 'train-extractor', 'extract', 'train-projector', 'project',
'compute-scores'),
help=argparse.SUPPRESS) # 'Executes a subtask (FOR INTERNAL USE ONLY!!!)'
parser.add_argument('--group',
help=argparse.SUPPRESS) # 'The group for which the current action should be performed'
# now that we have set up everything, get the command line arguments
return tools.initialize(parsers, command_line_parameters,
skips=['preprocessing', 'extraction', 'projector-training', 'projection',
'score-computation'])
skips=['preprocessing', 'extractor-training', 'extraction', 'projector-training',
'projection', 'score-computation'])
def add_jobs(args, submitter):
......@@ -65,6 +66,18 @@ def add_jobs(args, submitter):
**args.grid.preprocessing_queue)
deps.append(job_ids['preprocessing'])
# feature extraction training
if not args.skip_extractor_training and args.extractor.requires_training:
if args.grid is None:
jobs_to_execute.append(('train-extractor',))
else:
job_ids['extractor-training'] = submitter.submit(
'--sub-task train-extractor',
name='train-f',
dependencies=deps,
**args.grid.training_queue)
deps.append(job_ids['extractor-training'])
# feature extraction
if not args.skip_extraction:
if args.grid is None:
......@@ -151,6 +164,15 @@ def execute(args):
groups=tools.groups(args),
indices=biotools.indices(fs.original_data_list(groups=tools.groups(args)),
None if args.grid is None else args.grid.number_of_preprocessing_jobs),
allow_missing_files=args.allow_missing_files,
force=args.force)
# train the feature extractor
elif args.sub_task == 'train-extractor':
tools.train_extractor(
args.extractor,
args.preprocessor,
allow_missing_files=args.allow_missing_files,
force=args.force)
# extract the features
......@@ -161,6 +183,7 @@ def execute(args):
groups=tools.groups(args),
indices=biotools.indices(fs.preprocessed_data_list(groups=tools.groups(args)),
None if args.grid is None else args.grid.number_of_extraction_jobs),
allow_missing_files=args.allow_missing_files,
force=args.force)
# train the feature projector
......@@ -168,6 +191,7 @@ def execute(args):
tools.train_projector(
args.algorithm,
args.extractor,
allow_missing_files=args.allow_missing_files,
force=args.force)
# project the features
......@@ -178,6 +202,7 @@ def execute(args):
groups=tools.groups(args),
indices=biotools.indices(fs.preprocessed_data_list(groups=tools.groups(args)),
None if args.grid is None else args.grid.number_of_projection_jobs),
allow_missing_files=args.allow_missing_files,
force=args.force)
# compute scores
......@@ -185,6 +210,7 @@ def execute(args):
tools.compute_scores(
args.algorithm,
groups=[args.group],
allow_missing_files=args.allow_missing_files,
force=args.force,
write_compressed=args.write_compressed_score_files)
......
......@@ -25,6 +25,9 @@ class FileSelector(object):
preprocessed_directory : str
The directory, where preprocessed data should be written to.
extractor_file : str
The filename, where the extractor should be written to (if any).
extracted_directory : str
The directory, where extracted features should be written to.
......@@ -51,6 +54,7 @@ class FileSelector(object):
self,
database,
preprocessed_directory,
extractor_file,
extracted_directory,
projector_file,
projected_directory,
......@@ -61,6 +65,7 @@ class FileSelector(object):
"""Initialize the file selector object with the current configuration."""
self.database = database
self.extractor_file = extractor_file
self.projector_file = projector_file
self.score_directories = score_directories
......
......@@ -18,7 +18,7 @@ from .extractor import read_features
from bob.bio.base import utils
def train_projector(algorithm, extractor, force=False):
def train_projector(algorithm, extractor, allow_missing_files=False, force=False):
"""Trains the feature projector using extracted features of the ``'train'`` group, if the algorithm requires projector training.
This function should only be called, when the ``algorithm`` actually requires projector training.
......@@ -60,7 +60,7 @@ def train_projector(algorithm, extractor, force=False):
algorithm.train_projector(train_features, fs.projector_file)
def project(algorithm, extractor, groups=None, indices=None, force=False):
def project(algorithm, extractor, groups=None, indices=None, allow_missing_files=False, force=False):
"""Projects the features for all files of the database.
The given ``algorithm`` is used to project all features required for the current experiment.
......
This diff is collapsed.
......@@ -15,9 +15,55 @@ logger = logging.getLogger("bob.pad.base")
from .FileSelector import FileSelector
from bob.bio.base import utils
from .preprocessor import read_preprocessed_data
def extract(extractor, preprocessor, groups=None, indices=None, force=False):
def train_extractor(extractor, preprocessor, allow_missing_files=False, force=False):
"""Trains the feature extractor using preprocessed data of the ``'train'`` group,
if the feature extractor requires training.
This function should only be called, when the ``extractor`` actually requires training.
The given ``extractor`` is trained using preprocessed data.
It writes the extractor to the file specified by the :py:class:`bob.pad.base.tools.FileSelector`.
By default, if the target file already exist, it is not re-created.
**Parameters:**
extractor : py:class:`bob.bio.base.extractor.Extractor` or derived
The extractor to be trained.
preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived
The preprocessor, used for reading the preprocessed data.
allow_missing_files : bool
If set to ``True``, preprocessed data files that are not found are silently ignored during training.
force : bool
If given, the extractor file is regenerated, even if it already exists.
"""
if not extractor.requires_training:
logger.warn(
"The train_extractor function should not have been called, since the extractor does not need training.")
return
# the file selector object
fs = FileSelector.instance()
# the file to write
if utils.check_file(fs.extractor_file, force,
extractor.min_extractor_file_size):
logger.info("- Extraction: extractor '%s' already exists.", fs.extractor_file)
else:
bob.io.base.create_directories_safe(os.path.dirname(fs.extractor_file))
# read training files
train_files = fs.training_list('preprocessed', 'train_extractor')
train_data = read_preprocessed_data(train_files, preprocessor)
logger.info("- Extraction: training extractor '%s' using %d training files:", fs.extractor_file,
len(train_files))
# train model
extractor.train(train_data, fs.extractor_file)
def extract(extractor, preprocessor, groups=None, indices=None, allow_missing_files=False, force=False):
"""Extracts features from the preprocessed data using the given extractor.
The given ``extractor`` is used to extract all features required for the current experiment.
......@@ -46,6 +92,7 @@ def extract(extractor, preprocessor, groups=None, indices=None, force=False):
"""
# the file selector object
fs = FileSelector.instance()
extractor.load(fs.extractor_file)
data_files = fs.preprocessed_data_list(groups=groups)
feature_files = fs.feature_list(groups=groups)
......
......@@ -17,7 +17,7 @@ from .FileSelector import FileSelector
from bob.bio.base import utils
def preprocess(preprocessor, groups=None, indices=None, force=False):
def preprocess(preprocessor, groups=None, indices=None, allow_missing_files=False, force=False):
"""Preprocesses the original data of the database with the given preprocessor.
The given ``preprocessor`` is used to preprocess all data required for the current experiment.
......
......@@ -147,7 +147,7 @@ def _scores_all(algorithm, group, force, write_compressed=False):
current_toscore_objects[0]+current_toscore_objects[1], write_compressed)
def compute_scores(algorithm, force=False, groups=['dev', 'eval'], write_compressed=False):
def compute_scores(algorithm, force=False, groups=['dev', 'eval'], allow_missing_files=False, write_compressed=False):
"""Computes the scores for the given groups.
This function computes all scores for the experiment and writes them to score files.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment