From f994c1af7ea102e22d0542f371bdbd32c8d21d83 Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Mon, 17 Feb 2020 14:43:51 +0100 Subject: [PATCH] [dask] Preparing bob.bio.base for dask pipelines [ci] Accepting private packages TEMPORARILY [conda] Removed entry-points [ci] Reverting commit --- bob/bio/base/__init__.py | 3 - bob/bio/base/baseline/Baseline.py | 75 -- bob/bio/base/baseline/__init__.py | 31 - bob/bio/base/config/grid/__init__.py | 0 bob/bio/base/config/grid/demanding.py | 16 - bob/bio/base/config/grid/gpu.py | 16 - bob/bio/base/config/grid/grid.py | 4 - bob/bio/base/config/grid/local.py | 20 - bob/bio/base/grid.py | 149 ---- bob/bio/base/pipelines/__init__.py | 4 + bob/bio/base/pipelines/annotated_blocks.py | 314 ++++++++ bob/bio/base/pipelines/blocks.py | 591 +++++++++++++++ bob/bio/base/pipelines/vanilla_biometrics.py | 365 ++++++++++ bob/bio/base/script/__init__.py | 2 - bob/bio/base/script/annotate.py | 23 +- bob/bio/base/script/baseline.py | 128 ---- bob/bio/base/script/collect_results.py | 248 ------- bob/bio/base/script/enroll.py | 62 -- bob/bio/base/script/extract.py | 52 -- bob/bio/base/script/fuse_scores.py | 117 --- bob/bio/base/script/grid_search.py | 442 ------------ bob/bio/base/script/preprocess.py | 64 -- bob/bio/base/script/score.py | 66 -- bob/bio/base/script/vanilla_biometrics.py | 226 ++++++ bob/bio/base/script/verify.py | 442 ------------ bob/bio/base/test/test_baselines.py | 21 - bob/bio/base/test/test_commands.py | 6 +- bob/bio/base/test/test_config_file.py | 333 --------- bob/bio/base/test/test_scripts.py | 713 ------------------- bob/bio/base/test/test_tools.py | 4 - bob/bio/base/test/test_utils.py | 18 - bob/bio/base/tools/FileSelector.py | 280 -------- bob/bio/base/tools/__init__.py | 10 - bob/bio/base/tools/algorithm.py | 363 ---------- bob/bio/base/tools/command_line.py | 626 ---------------- bob/bio/base/tools/extractor.py | 179 ----- bob/bio/base/tools/grid.py | 134 ---- bob/bio/base/tools/preprocessor.py | 138 ---- bob/bio/base/tools/scoring.py | 596 ---------------- bob/bio/base/utils/__init__.py | 1 - bob/bio/base/utils/singleton.py | 44 -- conda/meta.yaml | 21 +- requirements.txt | 1 + setup.py | 29 +- 44 files changed, 1535 insertions(+), 5442 deletions(-) delete mode 100644 bob/bio/base/baseline/Baseline.py delete mode 100755 bob/bio/base/baseline/__init__.py delete mode 100644 bob/bio/base/config/grid/__init__.py delete mode 100644 bob/bio/base/config/grid/demanding.py delete mode 100644 bob/bio/base/config/grid/gpu.py delete mode 100644 bob/bio/base/config/grid/grid.py delete mode 100644 bob/bio/base/config/grid/local.py delete mode 100644 bob/bio/base/grid.py create mode 100644 bob/bio/base/pipelines/__init__.py create mode 100644 bob/bio/base/pipelines/annotated_blocks.py create mode 100644 bob/bio/base/pipelines/blocks.py create mode 100644 bob/bio/base/pipelines/vanilla_biometrics.py delete mode 100644 bob/bio/base/script/baseline.py delete mode 100644 bob/bio/base/script/collect_results.py delete mode 100644 bob/bio/base/script/enroll.py delete mode 100644 bob/bio/base/script/extract.py delete mode 100755 bob/bio/base/script/fuse_scores.py delete mode 100755 bob/bio/base/script/grid_search.py delete mode 100644 bob/bio/base/script/preprocess.py delete mode 100644 bob/bio/base/script/score.py create mode 100644 bob/bio/base/script/vanilla_biometrics.py delete mode 100644 bob/bio/base/script/verify.py delete mode 100644 bob/bio/base/test/test_baselines.py delete mode 100644 bob/bio/base/test/test_config_file.py delete mode 100644 bob/bio/base/test/test_scripts.py delete mode 100644 bob/bio/base/test/test_tools.py delete mode 100644 bob/bio/base/tools/FileSelector.py delete mode 100644 bob/bio/base/tools/__init__.py delete mode 100644 bob/bio/base/tools/algorithm.py delete mode 100644 bob/bio/base/tools/command_line.py delete mode 100644 bob/bio/base/tools/extractor.py delete mode 100644 bob/bio/base/tools/grid.py delete mode 100644 bob/bio/base/tools/preprocessor.py delete mode 100644 bob/bio/base/tools/scoring.py delete mode 100644 bob/bio/base/utils/singleton.py diff --git a/bob/bio/base/__init__.py b/bob/bio/base/__init__.py index c69d5db3..82ce2a09 100644 --- a/bob/bio/base/__init__.py +++ b/bob/bio/base/__init__.py @@ -3,10 +3,7 @@ from . import database from . import preprocessor from . import extractor from . import algorithm -from . import tools -from . import grid # only one file, not complete directory from . import annotator -from . import baseline from . import script from . import test diff --git a/bob/bio/base/baseline/Baseline.py b/bob/bio/base/baseline/Baseline.py deleted file mode 100644 index 36912797..00000000 --- a/bob/bio/base/baseline/Baseline.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# Tiago de Freitas Pereira <tiago.pereira@idiap.ch> -from .. import resource_keys, load_resource - - -def search_preprocessor(db_name, keys): - """ - Wrapper that searches for preprocessors for specific databases. - If not found, the default preprocessor is returned - """ - for k in keys: - if db_name.startswith(k): - return k - else: - return "default" - - -def get_available_databases(): - """ - Get all the available databases through the database entry-points - """ - - available_databases = dict() - all_databases = resource_keys('database', strip=[]) - for database in all_databases: - try: - database_entry_point = load_resource(database, 'database') - - available_databases[database] = dict() - - # Checking if the database has data for the ZT normalization - available_databases[database]["has_zt"] = hasattr(database_entry_point, "zobjects") and hasattr(database_entry_point, "tobjects") - available_databases[database]["groups"] = [] - # Searching for database groups - try: - groups = list(database_entry_point.groups()) or ["dev"] - for g in ["dev", "eval"]: - available_databases[database]["groups"] += [g] if g in groups else [] - except Exception: - # In case the method groups is not implemented - available_databases[database]["groups"] = ["dev"] - except Exception: - pass - return available_databases - - -class Baseline(object): - """ - Base class to define baselines - - A Baseline is composed by the triplet - :any:`bob.bio.base.preprocessor.Preprocessor`, - :any:`bob.bio.base.extractor.Extractor`, and - :any:`bob.bio.base.algorithm.Algorithm` - - Attributes - ---------- - name : str - Name of the baseline. This name will be displayed in the command line - interface. - preprocessors : dict - Dictionary containing all possible preprocessors - extractor : str - Registered resource or a config file containing the feature extractor - algorithm : str - Registered resource or a config file containing the algorithm - """ - - def __init__(self, name, preprocessors, extractor, algorithm, **kwargs): - super(Baseline, self).__init__(**kwargs) - self.name = name - self.preprocessors = preprocessors - self.extractor = extractor - self.algorithm = algorithm diff --git a/bob/bio/base/baseline/__init__.py b/bob/bio/base/baseline/__init__.py deleted file mode 100755 index 8e510bd3..00000000 --- a/bob/bio/base/baseline/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -from .Baseline import Baseline, search_preprocessor, get_available_databases - - -def get_config(): - """Returns a string containing the configuration information. - """ - import bob.extension - return bob.extension.get_config(__name__) - - -# gets sphinx autodoc done right - don't remove it -def __appropriate__(*args): - """Says object was actually declared here, and not in the import module. - Fixing sphinx warnings of not being able to find classes, when path is - shortened. Parameters: - - *args: An iterable of objects to modify - - Resolves `Sphinx referencing issues - <https://github.com/sphinx-doc/sphinx/issues/3048>` - """ - - for obj in args: - obj.__module__ = __name__ - - -__appropriate__( - Baseline, -) - -__all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/bio/base/config/grid/__init__.py b/bob/bio/base/config/grid/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/bob/bio/base/config/grid/demanding.py b/bob/bio/base/config/grid/demanding.py deleted file mode 100644 index d5655ebc..00000000 --- a/bob/bio/base/config/grid/demanding.py +++ /dev/null @@ -1,16 +0,0 @@ -import bob.bio.base - -# define a queue with demanding parameters -grid = bob.bio.base.grid.Grid( - training_queue = '32G', - # preprocessing - preprocessing_queue = '4G-io-big', - # feature extraction - extraction_queue = '8G-io-big', - # feature projection - projection_queue = '8G-io-big', - # model enrollment - enrollment_queue = '8G-io-big', - # scoring - scoring_queue = '8G-io-big' -) diff --git a/bob/bio/base/config/grid/gpu.py b/bob/bio/base/config/grid/gpu.py deleted file mode 100644 index ecc6e475..00000000 --- a/bob/bio/base/config/grid/gpu.py +++ /dev/null @@ -1,16 +0,0 @@ -import bob.bio.base - -# define a queue with demanding parameters -grid = bob.bio.base.grid.Grid( - training_queue = 'GPU', - # preprocessing - preprocessing_queue = '4G', - # feature extraction - extraction_queue = 'GPU', - # feature projection - projection_queue = '4G', - # model enrollment - enrollment_queue = '4G', - # scoring - scoring_queue = '4G' -) diff --git a/bob/bio/base/config/grid/grid.py b/bob/bio/base/config/grid/grid.py deleted file mode 100644 index f4c3852d..00000000 --- a/bob/bio/base/config/grid/grid.py +++ /dev/null @@ -1,4 +0,0 @@ -import bob.bio.base - -# define the queue using all the default parameters -grid = bob.bio.base.grid.Grid() diff --git a/bob/bio/base/config/grid/local.py b/bob/bio/base/config/grid/local.py deleted file mode 100644 index baddc809..00000000 --- a/bob/bio/base/config/grid/local.py +++ /dev/null @@ -1,20 +0,0 @@ -import bob.bio.base - -# define the queue using all the default parameters -grid = bob.bio.base.grid.Grid( - grid_type = 'local', - number_of_parallel_processes = 4 -) - - -# define a queue that is highly parallelized -grid_p8 = bob.bio.base.grid.Grid( - grid_type = 'local', - number_of_parallel_processes = 8 -) - -# define a queue that is highly parallelized -grid_p16 = bob.bio.base.grid.Grid( - grid_type = 'local', - number_of_parallel_processes = 16 -) diff --git a/bob/bio/base/grid.py b/bob/bio/base/grid.py deleted file mode 100644 index 791604d0..00000000 --- a/bob/bio/base/grid.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# @author: Manuel Guenther <Manuel.Guenther@idiap.ch> -# @date: Tue Oct 2 12:12:39 CEST 2012 -import six - -PREDEFINED_QUEUES = { - 'default' : {}, - '2G' : {'queue' : 'all.q', 'memfree' : '2G'}, - '4G' : {'queue' : 'all.q', 'memfree' : '4G'}, - '4G-q1d' : {'queue' : 'q1d', 'memfree' : '4G'}, - '4G-io-big' : {'queue' : 'q1d', 'memfree' : '4G', 'io_big' : True}, - '8G' : {'queue' : 'q1d', 'memfree' : '8G'}, - '8G-io-big' : {'queue' : 'q1d', 'memfree' : '8G', 'io_big' : True}, - '16G' : {'queue' : 'q1dm', 'memfree' : '16G', 'pe_opt' : 'pe_mth 2', 'hvmem' : '8G'}, - '16G-io-big' : {'queue' : 'q1dm', 'memfree' : '16G', 'pe_opt' : 'pe_mth 2', 'hvmem' : '8G', 'io_big' : True}, - '32G' : {'queue' : 'q1dm', 'memfree' : '32G', 'pe_opt' : 'pe_mth 4', 'hvmem' : '8G', 'io_big' : True}, - '64G' : {'queue' : 'q1dm', 'memfree' : '56G', 'pe_opt' : 'pe_mth 8', 'hvmem' : '7G', 'io_big' : True}, - 'Week' : {'queue' : 'q1wm', 'memfree' : '32G', 'pe_opt' : 'pe_mth 4', 'hvmem' : '8G'}, - 'GPU' : {'queue' : 'gpu'} -} - -from . import utils - -class Grid (object): - """This class is defining the options that are required to submit parallel jobs to the SGE grid, or jobs to the local queue. - - If the given ``grid_type`` is ``'sge'`` (the default), this configuration is set up to submit algorithms to the SGE grid. - In this setup, specific SGE queues can be specified for different steps of the tool chain, and different numbers of parallel processes can be specified for each step. - Currently, only the SGE at Idiap_ is tested and supported, for other SGE's we do not assure compatibility. - - If the given ``grid_type`` is ``'local'``, this configuration is set up to run using a local scheduler on a single machine. - In this case, only the ``number_of_parallel_processes`` and ``scheduler_sleep_time`` options will be taken into account. - - **Parameters:** - - grid_type : one of ``('sge', 'local')`` - The type of submission system, which should be used. - Currently, only sge and local submissions are supported. - - number_of_preprocessing_jobs, number_of_extraction_jobs, number_of_projection_jobs, number_of_enrollment_jobs, number_of_scoring_jobs : int - Only valid if ``grid_type = 'sge'``. - The number of parallel processes that should be executed for preprocessing, extraction, projection, enrollment or scoring. - - training_queue, preprocessing_queue, extraction_queue, projection_queue, enrollment_queue, scoring_queue : str or dict - Only valid if ``grid_type = 'sge'``. - SGE queues that should be used for training, preprocessing, extraction, projection, enrollment or scoring. - The queue can be defined using a dictionary of keywords that will directly passed to the :py:func:`gridtk.tools.qsub` function, or one of our :py:data:`PREDEFINED_QUEUES`, which are adapted for Idiap_. - - number_of_parallel_processes : int - Only valid if ``grid_type = 'local'``. - The number of parallel processes, with which the preprocessing, extraction, projection, enrollment and scoring should be executed. - - scheduler_sleep_time : float - The time (in seconds) that the local scheduler will sleep between its iterations. - """ - - def __init__( - self, - # grid type, currently supported 'local' and 'sge' - grid_type = 'sge', - # parameters for the splitting of jobs into array jobs; ignored by the local scheduler - number_of_preprocessing_jobs = 32, - number_of_extraction_jobs = 32, - number_of_projection_jobs = 32, - number_of_enrollment_jobs = 32, - number_of_scoring_jobs = 32, - - # queue setup for the SGE grid (only used if grid = 'sge', the default) - training_queue = '8G', - preprocessing_queue = 'default', - extraction_queue = 'default', - projection_queue = 'default', - enrollment_queue = 'default', - scoring_queue = 'default', - - # setup of the local submission and execution of job (only used if grid = 'local') - number_of_parallel_processes = 1, - scheduler_sleep_time = 1.0 # sleep time for scheduler in seconds - ): - - self.grid_type = grid_type - if self.is_local(): - self._kwargs = dict(grid_type=grid_type, number_of_parallel_processes=number_of_parallel_processes, scheduler_sleep_time=scheduler_sleep_time) - else: - self._kwargs = dict( - grid_type=grid_type, - number_of_preprocessing_jobs=number_of_preprocessing_jobs, number_of_extraction_jobs=number_of_extraction_jobs, number_of_projection_jobs=number_of_projection_jobs, number_of_enrollment_jobs=number_of_enrollment_jobs, - training_queue=training_queue, preprocessing_queue=preprocessing_queue, extraction_queue=extraction_queue, projection_queue=projection_queue, enrollment_queue=enrollment_queue, scoring_queue=scoring_queue - ) - - - # the numbers - if self.is_local(): - self.number_of_preprocessing_jobs = number_of_parallel_processes - self.number_of_extraction_jobs = number_of_parallel_processes - self.number_of_projection_jobs = number_of_parallel_processes - self.number_of_enrollment_jobs = number_of_parallel_processes - self.number_of_scoring_jobs = number_of_parallel_processes - else: - self.number_of_preprocessing_jobs = number_of_preprocessing_jobs - self.number_of_extraction_jobs = number_of_extraction_jobs - self.number_of_projection_jobs = number_of_projection_jobs - self.number_of_enrollment_jobs = number_of_enrollment_jobs - self.number_of_scoring_jobs = number_of_scoring_jobs - - # the queues - self.training_queue = self.queue(training_queue) - self.preprocessing_queue = self.queue(preprocessing_queue) - self.extraction_queue = self.queue(extraction_queue) - self.projection_queue = self.queue(projection_queue) - self.enrollment_queue = self.queue(enrollment_queue) - self.scoring_queue = self.queue(scoring_queue) - # the local setup - self.number_of_parallel_processes = number_of_parallel_processes - self.scheduler_sleep_time = scheduler_sleep_time - - - def __str__(self): - """Converts this grid configuration into a string, which contains the complete set of parameters.""" - return utils.pretty_print(self, self._kwargs) - - - def queue(self, params): - """queue(params) -> dict - - This helper function translates the given queue parameters to grid options. - When the given ``params`` are a dictionary already, they are simply returned. - If ``params`` is a string, the :py:data:`PREDEFINED_QUEUES` are indexed with them. - If ``params`` is ``None``, or the ``grid_type`` is ``'local'``, an empty dictionary is returned. - """ - if self.is_local(): - return {} - if isinstance(params, six.string_types) and params in PREDEFINED_QUEUES: - return PREDEFINED_QUEUES[params] - elif isinstance(params, dict): - return params - elif params is None: - return {} - else: - raise ValueError("The given queue parameters '%s' are not in the predefined queues and neither a dictionary with values." % str(params)) - - - def is_local(self): - """Returns whether this grid setup should use the local submission or the SGE grid.""" - return self.grid_type == 'local' - -# gets sphinx autodoc done right - don't remove it -__all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/bio/base/pipelines/__init__.py b/bob/bio/base/pipelines/__init__.py new file mode 100644 index 00000000..edbb4090 --- /dev/null +++ b/bob/bio/base/pipelines/__init__.py @@ -0,0 +1,4 @@ +# see https://docs.python.org/3/library/pkgutil.html +from pkgutil import extend_path + +__path__ = extend_path(__path__, __name__) diff --git a/bob/bio/base/pipelines/annotated_blocks.py b/bob/bio/base/pipelines/annotated_blocks.py new file mode 100644 index 00000000..bafd959e --- /dev/null +++ b/bob/bio/base/pipelines/annotated_blocks.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + + +"""Re-usable blocks for legacy bob.bio.base algorithms""" + + +import os +import copy +import functools + +import bob.io.base + + +from .blocks import DatabaseConnector, SampleLoader +from bob.pipelines.sample.sample import SampleSet, DelayedSample, Sample + + +class DatabaseConnectorAnnotated(DatabaseConnector): + """Wraps a bob.bio.base database and generates conforming samples for datasets + that has annotations + + This connector allows wrapping generic bob.bio.base datasets and generate + samples that conform to the specifications of biometric pipelines defined + in this package. + + + Parameters + ---------- + + database : object + An instantiated version of a bob.bio.base.Database object + + protocol : str + The name of the protocol to generate samples from. + To be plugged at :py:method:`bob.db.base.Database.objects`. + + """ + + def __init__(self, database, protocol): + super(DatabaseConnectorAnnotated, self).__init__(database, protocol) + + + def background_model_samples(self): + """Returns :py:class:`Sample`'s to train a background model (group + ``world``). + + + Returns + ------- + + samples : list + List of samples conforming the pipeline API for background + model training. See, e.g., :py:func:`.pipelines.first`. + + """ + + # TODO: This should be organized by client + retval = [] + + objects = self.database.objects(protocol=self.protocol, groups="world") + + return [ + SampleSet( + [ + DelayedSample( + load=functools.partial( + k.load, + self.database.original_directory, + self.database.original_extension, + ), + id=k.id, + path=k.path, + annotations=self.database.annotations(k) + ) + ] + ) + for k in objects + ] + + def references(self, group="dev"): + """Returns :py:class:`Reference`'s to enroll biometric references + + + Parameters + ---------- + + group : :py:class:`str`, optional + A ``group`` to be plugged at + :py:meth:`bob.db.base.Database.objects` + + + Returns + ------- + + references : list + List of samples conforming the pipeline API for the creation of + biometric references. See, e.g., :py:func:`.pipelines.first`. + + """ + + retval = [] + + for m in self.database.model_ids_with_protocol(protocol=self.protocol, groups=group): + + objects = self.database.objects( + protocol=self.protocol, + groups=group, + model_ids=(m,), + purposes="enroll", + ) + + retval.append( + SampleSet( + [ + DelayedSample( + load=functools.partial( + k.load, + self.database.original_directory, + self.database.original_extension, + ), + id=k.id, + path=k.path, + annotations=self.database.annotations(k) + ) + for k in objects + ], + id=m, + path=str(m), + subject=objects[0].client_id, + ) + ) + + return retval + + def probes(self, group): + """Returns :py:class:`Probe`'s to score biometric references + + + Parameters + ---------- + + group : str + A ``group`` to be plugged at + :py:meth:`bob.db.base.Database.objects` + + + Returns + ------- + + probes : list + List of samples conforming the pipeline API for the creation of + biometric probes. See, e.g., :py:func:`.pipelines.first`. + + """ + + probes = dict() + + for m in self.database.model_ids_with_protocol(protocol=self.protocol, groups=group): + + # Getting all the probe objects from a particular biometric + # reference + objects = self.database.objects( + protocol=self.protocol, + groups=group, + model_ids=(m,), + purposes="probe", + ) + + # Creating probe samples + for o in objects: + if o.id not in probes: + probes[o.id] = SampleSet( + [ + DelayedSample( + load=functools.partial( + o.load, + self.database.original_directory, + self.database.original_extension, + ), + id=o.id, + path=o.path, + annotations=self.database.annotations(o) + ) + ], + id=o.id, + path=o.path, + subject=o.client_id, + references=[m], + ) + else: + probes[o.id].references.append(m) + + return list(probes.values()) + + +class SampleLoaderAnnotated(SampleLoader): + """Adaptor for loading, preprocessing and feature extracting samples that uses annotations + + This adaptor class wraps around sample: + + .. code-block:: text + + [loading [-> preprocessing [-> extraction]]] + + The input sample object must obbey the following (minimal) API: + + * attribute ``id``: Contains an unique (string-fiable) identifier for + processed samples + * attribute ``data``: Contains the data for this sample + + Optional checkpointing is also implemented for each of the states, + independently. You may check-point just the preprocessing, feature + extraction or both. + + + Parameters + ---------- + + pipeline : :py:class:`list` of (:py:class:`str`, callable) + A list of doubles in which the first entry are names of each processing + step in the pipeline and second entry must be default-constructible + :py:class:`bob.bio.base.preprocessor.Preprocessor` or + :py:class:`bob.bio.base.preprocessor.Extractor` in any order. Each + of these objects must be a python type, that can be instantiated and + used through its ``__call__()`` interface to process a single entry of + a sample. For python types that you may want to plug-in, but do not + offer a default constructor that you like, pass the result of + :py:func:`functools.partial` instead. + + """ + + def __init__(self, pipeline): + super(SampleLoaderAnnotated, self).__init__(pipeline) + + + def _handle_step(self, sset, func, checkpoint): + """Handles a single step in the pipeline, with optional checkpointing + + Parameters + ---------- + + sset : SampleSet + The original sample set to be processed (delayed or pre-loaded) + + func : callable + The processing function to call for processing **each** sample in + the set, if needs be + + checkpoint : str, None + An optional string that may point to a directory that will be used + for checkpointing the processing phase in question + + + Returns + ------- + + r : SampleSet + The prototype processed sample. If no checkpointing required, this + will be of type :py:class:`Sample`. Otherwise, it will be a + :py:class:`DelayedSample` + + """ + + if checkpoint is not None: + samples = [] # processed samples + for s in sset.samples: + # there can be a checkpoint for the data to be processed + candidate = os.path.join(checkpoint, s.path + ".hdf5") + if not os.path.exists(candidate): + + # TODO: Fix this on bob.bio.base + try: + # preprocessing is required, and checkpointing, do it now + data = func(s.data, annotations=s.annotations) + except: + data = func(s.data) + + + # notice this can be called in parallel w/o failing + bob.io.base.create_directories_safe( + os.path.dirname(candidate) + ) + # bob.bio.base standard interface for preprocessor + # has a read/write_data methods + writer = ( + getattr(func, "write_data") + if hasattr(func, "write_data") + else getattr(func, "write_feature") + ) + writer(data, candidate) + + # because we are checkpointing, we return a DelayedSample + # instead of normal (preloaded) sample. This allows the next + # phase to avoid loading it would it be unnecessary (e.g. next + # phase is already check-pointed) + reader = ( + getattr(func, "read_data") + if hasattr(func, "read_data") + else getattr(func, "read_feature") + ) + samples.append( + DelayedSample( + functools.partial(reader, candidate), parent=s + ) + ) + else: + # if checkpointing is not required, load the data and preprocess it + # as we would normally do + samples = [Sample(func(s.data), parent=s) for s in sset.samples] + + r = SampleSet(samples, parent=sset) + return r + diff --git a/bob/bio/base/pipelines/blocks.py b/bob/bio/base/pipelines/blocks.py new file mode 100644 index 00000000..0b9c88fb --- /dev/null +++ b/bob/bio/base/pipelines/blocks.py @@ -0,0 +1,591 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + + +"""Re-usable blocks for legacy bob.bio.base algorithms""" + + +import os +import copy +import functools + +import bob.io.base + + +class DatabaseConnector: + """Wraps a bob.bio.base database and generates conforming samples + + This connector allows wrapping generic bob.bio.base datasets and generate + samples that conform to the specifications of biometric pipelines defined + in this package. + + + Parameters + ---------- + + database : object + An instantiated version of a bob.bio.base.Database object + + protocol : str + The name of the protocol to generate samples from. + To be plugged at :py:method:`bob.db.base.Database.objects`. + + """ + + def __init__(self, database, protocol): + self.database = database + self.protocol = protocol + self.directory = database.original_directory + self.extension = database.original_extension + + def background_model_samples(self): + """Returns :py:class:`Sample`'s to train a background model (group + ``world``). + + + Returns + ------- + + samples : list + List of samples conforming the pipeline API for background + model training. See, e.g., :py:func:`.pipelines.first`. + + """ + + # TODO: This should be organized by client + retval = [] + + objects = self.database.objects(protocol=self.protocol, groups="world") + + return [ + SampleSet( + [ + DelayedSample( + load=functools.partial( + k.load, + self.database.original_directory, + self.database.original_extension, + ), + id=k.id, + path=k.path, + ) + ] + ) + for k in objects + ] + + def references(self, group="dev"): + """Returns :py:class:`Reference`'s to enroll biometric references + + + Parameters + ---------- + + group : :py:class:`str`, optional + A ``group`` to be plugged at + :py:meth:`bob.db.base.Database.objects` + + + Returns + ------- + + references : list + List of samples conforming the pipeline API for the creation of + biometric references. See, e.g., :py:func:`.pipelines.first`. + + """ + + retval = [] + + for m in self.database.model_ids(protocol=self.protocol, groups=group): + + objects = self.database.objects( + protocol=self.protocol, + groups=group, + model_ids=(m,), + purposes="enroll", + ) + + retval.append( + SampleSet( + [ + DelayedSample( + load=functools.partial( + k.load, + self.database.original_directory, + self.database.original_extension, + ), + id=k.id, + path=k.path, + ) + for k in objects + ], + id=m, + path=str(m), + subject=objects[0].client_id, + ) + ) + + return retval + + def probes(self, group): + """Returns :py:class:`Probe`'s to score biometric references + + + Parameters + ---------- + + group : str + A ``group`` to be plugged at + :py:meth:`bob.db.base.Database.objects` + + + Returns + ------- + + probes : list + List of samples conforming the pipeline API for the creation of + biometric probes. See, e.g., :py:func:`.pipelines.first`. + + """ + + probes = dict() + + for m in self.database.model_ids(protocol=self.protocol, groups=group): + + # Getting all the probe objects from a particular biometric + # reference + objects = self.database.objects( + protocol=self.protocol, + groups=group, + model_ids=(m,), + purposes="probe", + ) + + # Creating probe samples + for o in objects: + if o.id not in probes: + probes[o.id] = SampleSet( + [ + DelayedSample( + load=functools.partial( + o.load, + self.database.original_directory, + self.database.original_extension, + ), + id=o.id, + path=o.path, + ) + ], + id=o.id, + path=o.path, + subject=o.client_id, + references=[m], + ) + else: + probes[o.id].references.append(m) + + return list(probes.values()) + + +class SampleLoader: + """Adaptor for loading, preprocessing and feature extracting samples + + This adaptor class wraps around sample: + + .. code-block:: text + + [loading [-> preprocessing [-> extraction]]] + + The input sample object must obbey the following (minimal) API: + + * attribute ``id``: Contains an unique (string-fiable) identifier for + processed samples + * attribute ``data``: Contains the data for this sample + + Optional checkpointing is also implemented for each of the states, + independently. You may check-point just the preprocessing, feature + extraction or both. + + + Parameters + ---------- + + pipeline : :py:class:`list` of (:py:class:`str`, callable) + A list of doubles in which the first entry are names of each processing + step in the pipeline and second entry must be default-constructible + :py:class:`bob.bio.base.preprocessor.Preprocessor` or + :py:class:`bob.bio.base.preprocessor.Extractor` in any order. Each + of these objects must be a python type, that can be instantiated and + used through its ``__call__()`` interface to process a single entry of + a sample. For python types that you may want to plug-in, but do not + offer a default constructor that you like, pass the result of + :py:func:`functools.partial` instead. + + """ + + def __init__(self, pipeline): + self.pipeline = copy.deepcopy(pipeline) + + def _handle_step(self, sset, func, checkpoint): + """Handles a single step in the pipeline, with optional checkpointing + + Parameters + ---------- + + sset : SampleSet + The original sample set to be processed (delayed or pre-loaded) + + func : callable + The processing function to call for processing **each** sample in + the set, if needs be + + checkpoint : str, None + An optional string that may point to a directory that will be used + for checkpointing the processing phase in question + + + Returns + ------- + + r : SampleSet + The prototype processed sample. If no checkpointing required, this + will be of type :py:class:`Sample`. Otherwise, it will be a + :py:class:`DelayedSample` + + """ + + if checkpoint is not None: + samples = [] # processed samples + for s in sset.samples: + # there can be a checkpoint for the data to be processed + candidate = os.path.join(checkpoint, s.path + ".hdf5") + if not os.path.exists(candidate): + # preprocessing is required, and checkpointing, do it now + data = func(s.data) + + # notice this can be called in parallel w/o failing + bob.io.base.create_directories_safe( + os.path.dirname(candidate) + ) + # bob.bio.base standard interface for preprocessor + # has a read/write_data methods + writer = ( + getattr(func, "write_data") + if hasattr(func, "write_data") + else getattr(func, "write_feature") + ) + writer(data, candidate) + + # because we are checkpointing, we return a DelayedSample + # instead of normal (preloaded) sample. This allows the next + # phase to avoid loading it would it be unnecessary (e.g. next + # phase is already check-pointed) + reader = ( + getattr(func, "read_data") + if hasattr(func, "read_data") + else getattr(func, "read_feature") + ) + samples.append( + DelayedSample( + functools.partial(reader, candidate), parent=s + ) + ) + else: + # if checkpointing is not required, load the data and preprocess it + # as we would normally do + samples = [Sample(func(s.data), parent=s) for s in sset.samples] + + r = SampleSet(samples, parent=sset) + return r + + def _handle_sample(self, sset, pipeline): + """Handles a single sampleset through a pipelien + + Parameters + ---------- + + sset : SampleSet + The original sample set to be processed (delayed or pre-loaded) + + pipeline : :py:class:`list` of :py:class:`tuple` + A list of tuples, each comprising of one processing function and + one checkpoint directory (:py:class:`str` or ``None``, to avoid + checkpointing that phase), respectively + + + Returns + ------- + + r : Sample + The processed sample + + """ + + r = sset + for func, checkpoint in pipeline: + r = r if func is None else self._handle_step(r, func, checkpoint) + return r + + def __call__(self, samples, checkpoints): + """Applies the pipeline chaining with optional checkpointing + + Our implementation is optimized to minimize disk I/O to the most. It + yields :py:class:`DelayedSample`'s instead of :py:class:`Sample` if + checkpointing is enabled. + + + Parameters + ---------- + + samples : list + List of :py:class:`SampleSet` to be treated by this pipeline + + checkpoints : dict + A dictionary (with any number of entries) that may contain as many + keys as those defined when you constructed this class with the + pipeline tuple list. Upon execution, the existance of an entry + that defines checkpointing, this phase of the pipeline will be + checkpointed. Notice that you are in the control of checkpointing. + If you miss an intermediary step, it will trigger this loader to + load the relevant sample, even if the next phase is supposed to be + checkpointed. This strategy keeps the implementation as simple as + possible. + + + Returns + ------- + + samplesets : list + Loaded samplesets, after optional preprocessing and extraction + + """ + + pipe = [(v(), checkpoints.get(k)) for k, v in self.pipeline] + return [self._handle_sample(k, pipe) for k in samples] + + +class AlgorithmAdaptor: + """Describes a biometric model based on :py:class:`bob.bio.base.algorithm.Algorithm`'s + + The model can be fitted (optionally). Otherwise, it can only execute + biometric model enrollement, via ``enroll()`` and scoring, with + ``score()``. + + Parameters + ---------- + + algorithm : object + An object that can be initialized by default and posseses the + following attributes and methods: + + * attribute ``requires_projector_training``: indicating if the + model is fittable or not + * method ``train_projector(samples, path)``: receives a list of + objects produced by the equivalent ``Sample.data`` object, fed + **after** sample loading by the equivalent pipeline, and records + the model to an on-disk file + * method ``load_projector(path)``: loads the model state from a file + * method ``project(sample)``: projects the data to an embedding + from a single sample + * method ``enroll(samples)``: creates a scorable biometric + reference from a set of input samples + * method ``score(model, probe)``: scores a single probe, given the + input model, which can be obtained by a simple + ``project(sample)`` + + If the algorithm cannot be initialized by default, pass the result + of :py:func:`functools.partial` instead. + + path : string + A path leading to a place where to save the fitted model or, in + case this model is not fittable (``not is_fitable == False``), then + name of the model to load for running prediction and scoring. + + """ + + def __init__(self, algorithm): + self.algorithm = algorithm + self.extension = ".hdf5" + + def fit(self, samplesets, checkpoint): + """Fits this model, if it is fittable + + Parameters + ---------- + + samplesets : list + A list of :py:class:`SampleSet`s to be used for fitting this + model + + checkpoint : str + If provided, must the path leading to a location where this + model should be saved at (complete path without extension) - + currently, it needs to be provided because of existing + serialization requirements (see bob/bob.io.base#106), but + checkpointing will still work as expected. + + + Returns + ------- + + model : str + A path leading to the fitted model + + """ + + self.path = checkpoint + self.extension + if not os.path.exists(self.path): # needs training + model = self.algorithm() + bob.io.base.create_directories_safe(os.path.dirname(self.path)) + if model.requires_projector_training: + alldata = [ + sample.data + for sampleset in samplesets + for sample in sampleset.samples + ] + model.train_projector(alldata, self.path) + + return self.path + + def enroll(self, references, path, checkpoint, *args, **kwargs): + """Runs prediction on multiple input samples + + This method is optimized to deal with multiple reference biometric + samples at once, organized in partitions + + + Parameters + ---------- + + references : list + A list of :py:class:`SampleSet` objects to be used for + creating biometric references. The sets must be identified + with a unique id and a path, for eventual checkpointing. + + path : str + Path pointing to stored model on disk + + checkpoint : str, None + If passed and not ``None``, then it is considered to be the + path of a directory containing possible cached values for each + of the references in this experiment. If that is the case, the + values are loaded from there and not recomputed. + + *args, **kwargs : + Extra parameters that can be used to hook-up processing graph + dependencies, but are currently ignored + + Returns + ------- + + references : list + A list of :py:class:`.samples.Reference` objects that can be + used in scoring + + """ + + class _CachedModel: + def __init__(self, algorithm, path): + self.model = algorithm() + self.loaded = False + self.path = path + + def load(self): + if not self.loaded: + self.model.load_projector(self.path) + self.loaded = True + + def enroll(self, k): + self.load() + if self.model.requires_projector_training: + return self.model.enroll( + [self.model.project(s.data) for s in k.samples] + ) + else: + return [s.data for s in k.samples] + + def write_enrolled(self, k, path): + self.model.write_model(k, path) + + model = _CachedModel(self.algorithm, path) + + retval = [] + for k in references: + if checkpoint is not None: + candidate = os.path.join( + os.path.join(checkpoint, k.path + ".hdf5") + ) + if not os.path.exists(candidate): + # create new checkpoint + bob.io.base.create_directories_safe( + os.path.dirname(candidate) + ) + enrolled = model.enroll(k) + model.model.write_model(enrolled, candidate) + retval.append( + DelayedSample( + functools.partial(model.model.read_model, candidate), + parent=k, + ) + ) + else: + # compute on-the-fly + retval.append(Sample(model.enroll(k), parent=k)) + return retval + + def score(self, probes, references, path, *args, **kwargs): + """Scores a new sample against multiple (potential) references + + Parameters + ---------- + + probes : list + A list of :py:class:`SampleSet` objects to be used for + scoring the input references + + references : list + A list of :py:class:`Sample` objects to be used for + scoring the input probes, must have an ``id`` attribute that + will be used to cross-reference which probes need to be scored. + + path : str + Path pointing to stored model on disk + + *args, **kwargs : + Extra parameters that can be used to hook-up processing graph + dependencies, but are currently ignored + + + Returns + ------- + + scores : list + For each sample in a probe, returns as many scores as there are + samples in the probe, together with the probe's and the + relevant reference's subject identifiers. + + """ + + model = self.algorithm() + model.load_projector(path) + + retval = [] + for p in probes: + if model.requires_projector_training: + data = [model.project(s.data) for s in p.samples] + else: + data = [s.data for s in p.samples] + + for subprobe_id, (s, parent) in enumerate(zip(data, p.samples)): + # each sub-probe in the probe needs to be checked + subprobe_scores = [] + for ref in [r for r in references if r.id in p.references]: + subprobe_scores.append( + Sample(model.score(ref.data, s), parent=ref) + ) + subprobe = SampleSet(subprobe_scores, parent=p) + subprobe.subprobe_id = subprobe_id + retval.append(subprobe) + return retval + diff --git a/bob/bio/base/pipelines/vanilla_biometrics.py b/bob/bio/base/pipelines/vanilla_biometrics.py new file mode 100644 index 00000000..5ef4d815 --- /dev/null +++ b/bob/bio/base/pipelines/vanilla_biometrics.py @@ -0,0 +1,365 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +""" +Biometric "blocks" + +This file contains simple processing blocks meant to be used +for bob.bio experiments +""" + +import dask.bag +import dask.delayed + + +def biometric_pipeline( + background_model_samples, + references, + probes, + loader, + algorithm, + npartitions, + checkpoints, +): + """Creates a simple pipeline for **biometric** experiments. + + This contains the following steps: + + 1. Train background model (without labels) + 2. Create biometric references (requires identity) + 3. Scoring (requires probe/reference matching and probe identity) + + + Parameters + ---------- + + background_model_samples : list + List of samples to be used for training an background model. Elements + provided must conform to the :py:class:`.samples.Sample` API, or be a + delayed version of such. + + references : list + List of references to be created in this biometric pipeline. Elements + provided must conform to the :py:class:`.samples.Reference` API, or be + a delayed version of such. + + probes : list + List of probes to be scored in this biometric pipeline. Elements + provided must conform to the :py:class:`.samples.Probe` API, or be + a delayed version of such. + + loader : object + An object that conforms to the :py:class:`.blocks.SampleLoader` API and + can load samples + + algorithm : object + An object that conforms to the :py:class:`.blocks.AlgorithmAdaptor` API + + npartitions : :py:class:`int`, optional + Number of partitions to use when processing this pipeline. Notice that + the number of partitions dictate how many preprocessor/feature + extraction/algorithms objects will be effectively initialized (that is, + will have their constructor called). Internally, we use + :py:class:`dask.bag`'s and :py:meth:`dask.bag.map_partitions` to + process one full partition in a single pass. + + checkpoints : :py:class:`dict` + A dictionary that maps processing phase names to paths that will be + used to create checkpoints of the different processing phases in this + pipeline. Checkpointing may speed up your processing. Existing files + that have been previously check-pointed will not be recomputed. + + Here is an example with all supported options for this pipeline: + + .. code-block:: python + + checkpoints = { + "background": { + "preprocessor": os.path.join("background", "preprocessed"), + "extractor": os.path.join("background", "extracted"), + # at least, the next stage must be provided! + "model": os.path.join("background", "model"), + }, + "references": { + "preprocessor": os.path.join("references", "preprocessed"), + "extractor": os.path.join("references", "extracted"), + "enrolled": os.path.join("references", "enrolled"), + }, + "probes": { + "preprocessor": os.path.join("probes", "preprocessed"), + "extractor": os.path.join("probes", "extracted"), + }, + } + + + + Returns + ------- + + scores: list + A delayed list of scores, that can be obtained by computing the graph + + """ + + ## Training background model (fit will return even if samples is ``None``, + ## in which case we suppose the algorithm is not trainable in any way) + background_model = train_background_model(background_model_samples, loader, algorithm, npartitions, checkpoints) + + ## Create biometric samples + biometric_references = create_biometric_reference(background_model,references,loader,algorithm,npartitions,checkpoints) + + ## Scores all probes + return compute_scores(background_model, biometric_references, probes, loader, algorithm, npartitions, checkpoints) + + +def train_background_model( + background_model_samples, + loader, + algorithm, + npartitions, + checkpoints, +): + """ + Train background model (without labels) + + Parameters + ---------- + + background_model_samples : list + List of samples to be used for training an background model. Elements + provided must conform to the :py:class:`.samples.Sample` API, or be a + delayed version of such. + + loader : object + An object that conforms to the :py:class:`.blocks.SampleLoader` API and + can load samples + + algorithm : object + An object that conforms to the :py:class:`.blocks.AlgorithmAdaptor` API + + npartitions : :py:class:`int`, optional + Number of partitions to use when processing this pipeline. Notice that + the number of partitions dictate how many preprocessor/feature + extraction/algorithms objects will be effectively initialized (that is, + will have their constructor called). Internally, we use + :py:class:`dask.bag`'s and :py:meth:`dask.bag.map_partitions` to + process one full partition in a single pass. + + checkpoints : :py:class:`dict` + A dictionary that maps processing phase names to paths that will be + used to create checkpoints of the different processing phases in this + pipeline. Checkpointing may speed up your processing. Existing files + that have been previously check-pointed will not be recomputed. + + Here is an example with all supported options for this pipeline: + + .. code-block:: python + + checkpoints = { + "background": { + "preprocessor": os.path.join("background", "preprocessed"), + "extractor": os.path.join("background", "extracted"), + # at least, the next stage must be provided! + "model": os.path.join("background", "model"), + }, + "references": { + "preprocessor": os.path.join("references", "preprocessed"), + "extractor": os.path.join("references", "extracted"), + "enrolled": os.path.join("references", "enrolled"), + }, + "probes": { + "preprocessor": os.path.join("probes", "preprocessed"), + "extractor": os.path.join("probes", "extracted"), + }, + } + + """ + ## Training background model (fit will return even if samples is ``None``, + ## in which case we suppose the algorithm is not trainable in any way) + db = dask.bag.from_sequence( + background_model_samples, npartitions=npartitions + ) + db = db.map_partitions(loader, checkpoints.get("background", {})) + background_model = dask.delayed(algorithm.fit)( + db, checkpoints["background"]["model"] + ) + + return background_model + + +def create_biometric_reference( + background_model, + references, + loader, + algorithm, + npartitions, + checkpoints, +): + """ + Create biometric references + + Parameters + ---------- + + background_model: dask.delayed + Trained background model (trained with :py:meth:`train_background_model`) + + references : list + List of references to be created in this biometric pipeline. Elements + provided must conform to the :py:class:`.samples.Reference` API, or be + a delayed version of such. + + loader : object + An object that conforms to the :py:class:`.blocks.SampleLoader` API and + can load samples + + algorithm : object + An object that conforms to the :py:class:`.blocks.AlgorithmAdaptor` API + + npartitions : :py:class:`int`, optional + Number of partitions to use when processing this pipeline. Notice that + the number of partitions dictate how many preprocessor/feature + extraction/algorithms objects will be effectively initialized (that is, + will have their constructor called). Internally, we use + :py:class:`dask.bag`'s and :py:meth:`dask.bag.map_partitions` to + process one full partition in a single pass. + + checkpoints : :py:class:`dict` + A dictionary that maps processing phase names to paths that will be + used to create checkpoints of the different processing phases in this + pipeline. Checkpointing may speed up your processing. Existing files + that have been previously check-pointed will not be recomputed. + + Here is an example with all supported options for this pipeline: + + .. code-block:: python + + checkpoints = { + "background": { + "preprocessor": os.path.join("background", "preprocessed"), + "extractor": os.path.join("background", "extracted"), + # at least, the next stage must be provided! + "model": os.path.join("background", "model"), + }, + "references": { + "preprocessor": os.path.join("references", "preprocessed"), + "extractor": os.path.join("references", "extracted"), + "enrolled": os.path.join("references", "enrolled"), + }, + "probes": { + "preprocessor": os.path.join("probes", "preprocessed"), + "extractor": os.path.join("probes", "extracted"), + }, + } + + Returns + ------- + + Return trained models + + + """ + + ## Enroll biometric references + db = dask.bag.from_sequence(references, npartitions=npartitions) + db = db.map_partitions(loader, checkpoints.get("references", {})) + references = db.map_partitions( + algorithm.enroll, + background_model, + checkpoints.get("references", {}).get("enrolled"), + ) + + return references + + +def compute_scores( + background_model, + references, + probes, + loader, + algorithm, + npartitions, + checkpoints, +): + """ Compute biometric scores + + Parameters + ---------- + + background_model: dask.delayed + Trained background model (trained with :py:meth:`train_background_model`) + + references: dask.delayed + Trained biometric references + + probes : list + List of probes to be scored in this biometric pipeline. Elements + provided must conform to the :py:class:`.samples.Probe` API, or be + a delayed version of such. + + loader : object + An object that conforms to the :py:class:`.blocks.SampleLoader` API and + can load samples + + algorithm : object + An object that conforms to the :py:class:`.blocks.AlgorithmAdaptor` API + + npartitions : :py:class:`int`, optional + Number of partitions to use when processing this pipeline. Notice that + the number of partitions dictate how many preprocessor/feature + extraction/algorithms objects will be effectively initialized (that is, + will have their constructor called). Internally, we use + :py:class:`dask.bag`'s and :py:meth:`dask.bag.map_partitions` to + process one full partition in a single pass. + + checkpoints : :py:class:`dict` + A dictionary that maps processing phase names to paths that will be + used to create checkpoints of the different processing phases in this + pipeline. Checkpointing may speed up your processing. Existing files + that have been previously check-pointed will not be recomputed. + + Here is an example with all supported options for this pipeline: + + .. code-block:: python + + checkpoints = { + "background": { + "preprocessor": os.path.join("background", "preprocessed"), + "extractor": os.path.join("background", "extracted"), + # at least, the next stage must be provided! + "model": os.path.join("background", "model"), + }, + "references": { + "preprocessor": os.path.join("references", "preprocessed"), + "extractor": os.path.join("references", "extracted"), + "enrolled": os.path.join("references", "enrolled"), + }, + "probes": { + "preprocessor": os.path.join("probes", "preprocessed"), + "extractor": os.path.join("probes", "extracted"), + }, + } + + + + Returns + ------- + + scores: list + A delayed list of scores, that can be obtained by computing the graph + + """ + + ## Scores all probes + db = dask.bag.from_sequence(probes, npartitions=npartitions) + db = db.map_partitions(loader, checkpoints.get("probes", {})) + + ## TODO: Here, we are sending all computed biometric references to all + ## probes. It would be more efficient if only the models related to each + ## probe are sent to the probing split. An option would be to use caching + ## and allow the ``score`` function above to load the required data from + ## the disk, directly. A second option would be to generate named delays + ## for each model and then associate them here. + all_references = dask.delayed(list)(references) + return db.map_partitions(algorithm.score, all_references, background_model) + diff --git a/bob/bio/base/script/__init__.py b/bob/bio/base/script/__init__.py index 58dc7690..e69de29b 100644 --- a/bob/bio/base/script/__init__.py +++ b/bob/bio/base/script/__init__.py @@ -1,2 +0,0 @@ -from . import verify -from . import grid_search diff --git a/bob/bio/base/script/annotate.py b/bob/bio/base/script/annotate.py index f88b6645..f19ca352 100644 --- a/bob/bio/base/script/annotate.py +++ b/bob/bio/base/script/annotate.py @@ -12,11 +12,32 @@ from bob.extension.scripts.click_helper import ( log_parameters, ) from bob.io.base import create_directories_safe -from bob.bio.base.tools.grid import indices logger = logging.getLogger(__name__) +def indices(list_to_split, number_of_parallel_jobs, task_id=None): + """This function returns the first and last index for the files for the current job ID. + If no job id is set (e.g., because a sub-job is executed locally), it simply returns all indices.""" + + if number_of_parallel_jobs is None or number_of_parallel_jobs == 1: + return None + + # test if the 'SEG_TASK_ID' environment is set + sge_task_id = os.getenv('SGE_TASK_ID') if task_id is None else task_id + if sge_task_id is None: + # task id is not set, so this function is not called from a grid job + # hence, we process the whole list + return (0,len(list_to_split)) + else: + job_id = int(sge_task_id) - 1 + # compute number of files to be executed + number_of_objects_per_job = int(math.ceil(float(len(list_to_split) / float(number_of_parallel_jobs)))) + start = job_id * number_of_objects_per_job + end = min((job_id + 1) * number_of_objects_per_job, len(list_to_split)) + return (start, end) + + def annotate_common_options(func): @click.option( "--annotator", diff --git a/bob/bio/base/script/baseline.py b/bob/bio/base/script/baseline.py deleted file mode 100644 index c5c4cbae..00000000 --- a/bob/bio/base/script/baseline.py +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# Tiago de Freitas Pereira <tiago.pereira@idiap.ch> - -""" -A script to run biometric recognition baselines -""" - - -from .. import load_resource -import os -from ..baseline import get_available_databases, search_preprocessor -from bob.extension.scripts.click_helper import ( - verbosity_option, log_parameters) -import click -import tempfile -import logging - -logger = logging.getLogger("bob.bio.base") - - -EPILOG = '''\b -Example: - $ bob bio baseline eigenface atnt -vvv - -which will run the eigenface baseline (from bob.bio.face) on the atnt -database. -''' - - -@click.command(context_settings={'ignore_unknown_options': True, - 'allow_extra_args': True}, epilog=EPILOG) -@click.argument('baseline', required=True) -@click.argument('database', required=True) -@click.option('--parallel-training', default='verify', show_default=True, - type=click.Choice(('verify', 'gmm', 'isv', 'ivector')), - help='Which script to use for training the algorithm. Some ' - 'algorithms would train more efficiently using a different ' - 'script.') -@verbosity_option() -@click.pass_context -def baseline(ctx, baseline, database, parallel_training, **kwargs): - """Run a biometric recognition baseline. - - \b - Check out all baselines available by running: - `resource.py --types baseline` - and all available databases by running: - `resource.py --types database` - - This script accepts parameters accepted by verify.py as well. - See `verify.py --help` for the extra options that you can pass. - - Hint: pass `--grid demanding` to run the baseline on the SGE grid. - - Hint: pass `--temp-directory <dir>` to set the directory for temporary files - - Hint: pass `--result-directory <dir>` to set the directory for resulting score files - - """ - log_parameters(logger) - - # Triggering training for each baseline/database - loaded_baseline = load_resource( - baseline, 'baseline', package_prefix="bob.bio.") - - # find the compatible preprocessor for this database - database_data = get_available_databases()[database] - db = search_preprocessor(database, loaded_baseline.preprocessors.keys()) - preprocessor = loaded_baseline.preprocessors[db] - - # this is the default sub-directory that is used - if "-T" in ctx.args or "--temp-directory" in ctx.args: - sub_directory = os.path.join(database, baseline) - else: - sub_directory = baseline - - logger.debug('Database groups are %s', database_data["groups"]) - - # call verify with newly generated config file. We will create a new config - # file to allow people to use chain-loading and further modify the - # baselines. See: https://gitlab.idiap.ch/bob/bob.bio.video/issues/12 - config = ''' -preprocessor = '{preprocessor}' -extractor = '{extractor}' -algorithm = '{algorithm}' -database = '{database}' -sub_directory = '{sub_directory}' -groups = ['{groups}'] -verbose = {verbose} -'''.format( - preprocessor=preprocessor, - extractor=loaded_baseline.extractor, - algorithm=loaded_baseline.algorithm, - database=database, - sub_directory=sub_directory, - groups="', '".join(database_data["groups"]), - verbose=ctx.meta['verbosity'], - ) - - if parallel_training == "verify": - from .verify import main - elif parallel_training == "gmm": - from bob.bio.gmm.script.verify_gmm import main - elif parallel_training == "isv": - from bob.bio.gmm.script.verify_isv import main - elif parallel_training == "ivector": - from bob.bio.gmm.script.verify_ivector import main - - algorithm = loaded_baseline.algorithm - if 'gmm' in algorithm and parallel_training != 'gmm': - logger.warning("GMM algorithms can train faster using the " - "``--parallel-training gmm`` option.") - if 'isv' in algorithm and parallel_training != 'isv': - logger.warning("ISV algorithms can train faster using the " - "``--parallel-training isv`` option.") - if 'ivector' in algorithm and parallel_training != 'ivector': - logger.warning("ivector algorithms can train faster using the " - "``--parallel-training ivector`` option.") - - with tempfile.NamedTemporaryFile(mode='w+t', prefix='{}_'.format(baseline), - suffix='.py', delete=False, dir='.') as f: - f.write(config) - f.flush() - f.seek(0) - main([f.name] + ctx.args) - click.echo("You may want to delete `{}' after the experiments are " - "finished running.".format(f.name)) diff --git a/bob/bio/base/script/collect_results.py b/bob/bio/base/script/collect_results.py deleted file mode 100644 index df08375c..00000000 --- a/bob/bio/base/script/collect_results.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# Manuel Guenther <manuel.guenther@idiap.ch> -# Tue Jul 2 14:52:49 CEST 2013 - -from __future__ import print_function - -""" -This script parses through the given directory, collects all results of -verification experiments that are stored in file with the given file name. -It supports the split into development and test set of the data, as well as -ZT-normalized scores. - -All result files are parsed and evaluated. For each directory, the following -information are given in columns: - - * The Equal Error Rate of the development set - * The Equal Error Rate of the development set after ZT-Normalization - * The Half Total Error Rate of the evaluation set - * The Half Total Error Rate of the evaluation set after ZT-Normalization - * The sub-directory where the scores can be found - -The measure type of the development set can be changed to compute "HTER" or -"FAR" thresholds instead, using the --criterion option. -""" - - -import sys, os, glob -import argparse -import numpy - -import bob.core -from .. import score - -logger = bob.core.log.setup("bob.bio.base") - -def command_line_arguments(command_line_parameters): - """Parse the program options""" - - # set up command line parser - parser = argparse.ArgumentParser(description=__doc__, - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('-d', '--devel-name', dest="dev", default="scores-dev", help = "Name of the file containing the development scores") - parser.add_argument('-e', '--eval-name', dest="eval", default="scores-eval", help = "Name of the file containing the evaluation scores") - parser.add_argument('-D', '--directory', default=".", help = "The directory where the results should be collected from; might include search patterns as '*'.") - parser.add_argument('-r', '--rank', type=int, default=1, help = "The rank for which to compute RR and DIR") - parser.add_argument('-f', '--far-threshold', type=float, default=0.001, help = "The FAR threshold to be used with criterion FAR and DIR") - - parser.add_argument('-n', '--nonorm-dir', dest="nonorm", default="nonorm", help = "Directory where the unnormalized scores are found") - parser.add_argument('-z', '--ztnorm-dir', dest="ztnorm", default = "ztnorm", help = "Directory where the normalized scores are found") - parser.add_argument('-s', '--sort', action='store_true', help = "Sort the results") - parser.add_argument('-k', '--sort-key', dest='key', default = 'nonorm-dev', choices= ('nonorm-dev','nonorm-eval','ztnorm-dev','ztnorm-eval','dir'), - help = "Sort the results according to the given key") - parser.add_argument('-c', '--criterion', dest='criterion', default = 'EER', choices = ('EER', 'HTER', 'FAR', 'RR', 'DIR'), - help = "Minimize the threshold on the development set according to the given criterion") - - parser.add_argument('-o', '--output', help = "Name of the output file that will contain the EER/HTER scores") - - parser.add_argument('--self-test', action='store_true', help=argparse.SUPPRESS) - - bob.core.log.add_command_line_option(parser) - - # parse arguments - args = parser.parse_args(command_line_parameters) - - bob.core.log.set_verbosity_level(logger, args.verbose) - - return args - -class Result: - """Class for collecting the results of one experiment.""" - def __init__(self, dir, args): - self.dir = dir - self.m_args = args - self.nonorm_dev = None - self.nonorm_eval = None - self.ztnorm_dev = None - self.ztnorm_eval = None - - def _calculate(self, dev_file, eval_file = None): - """Calculates the EER and HTER or FRR based on the threshold criterion.""" - if self.m_args.criterion in ("RR", "DIR"): - scores_dev = score.cmc(dev_file) - if eval_file is not None: - scores_eval = score.cmc(eval_file) - - if self.m_args.criterion == "DIR": - # get negatives without positives - negatives = [max(neg) for neg, pos in scores_dev if (pos is None or not numpy.array(pos).size) and neg is not None] - if not negatives: - raise ValueError("There need to be at least one pair with only negative scores") - threshold = bob.measure.far_threshold(negatives, [], self.m_args.far_threshold) - DIR_dev = bob.measure.detection_identification_rate(scores_dev, threshold, self.m_args.rank) - if eval_file is not None: - # re-compute the threshold for eval file - negatives = [max(neg) for neg, pos in scores_eval if (pos is None or not numpy.array(pos).size) and neg is not None] - if not negatives: - raise ValueError("There need to be at least one pair with only negative scores") - threshold = bob.measure.far_threshold(negatives, [], self.m_args.far_threshold) - DIR_eval = bob.measure.detection_identification_rate(scores_eval, threshold, self.m_args.rank) - else: - DIR_eval = None - return (DIR_dev, DIR_eval) - - else: - # Recognition Rate - RR_dev = bob.measure.recognition_rate(scores_dev) - RR_eval = None if eval_file is None else bob.measure.recognition_rate(scores_eval) - return (RR_dev, RR_eval) - - else: - - dev_neg, dev_pos = score.split(dev_file) - - # switch which threshold function to use - if self.m_args.criterion == 'EER': - threshold = bob.measure.far_threshold(dev_neg, dev_pos) - elif self.m_args.criterion == 'HTER': - threshold = bob.measure.min_hter_threshold(dev_neg, dev_pos) - elif self.m_args.criterion == 'FAR': - threshold = bob.measure.far_threshold(dev_neg, dev_pos, self.m_args.far_threshold) - else: - raise NotImplementedError("Criterion %s is not yet implemented", self.m_args.criterion) - - # compute far and frr for the given threshold - dev_far, dev_frr = bob.measure.farfrr(dev_neg, dev_pos, threshold) - dev_hter = (dev_far + dev_frr)/2.0 - - if eval_file: - eval_neg, eval_pos = score.split(eval_file) - eval_far, eval_frr = bob.measure.farfrr(eval_neg, eval_pos, threshold) - eval_hter = (eval_far + eval_frr)/2.0 - else: - eval_hter = None - eval_frr = None - - if self.m_args.criterion == 'FAR': - return (dev_frr, eval_frr) - else: - return (dev_hter, eval_hter) - - def nonorm(self, dev_file, eval_file = None): - self.nonorm_dev, self.nonorm_eval = self._calculate(dev_file, eval_file) - - def ztnorm(self, dev_file, eval_file = None): - self.ztnorm_dev, self.ztnorm_eval = self._calculate(dev_file, eval_file) - - def valid(self): - return any(a is not None for a in [self.nonorm_dev, self.ztnorm_dev, self.nonorm_eval, self.ztnorm_eval]) - - def __str__(self): - str = "" - for v in [self.nonorm_dev, self.ztnorm_dev, self.nonorm_eval, self.ztnorm_eval]: - if v is not None: - val = "% 2.3f%%"%(v*100) - else: - val = "None" - cnt = 16-len(val) - str += " "*cnt + val - str += " %s"%self.dir - return str[5:] - - -def add_results(args, nonorm, ztnorm = None): - """Adds results of the given nonorm and ztnorm directories.""" - r = Result(os.path.dirname(nonorm).replace(args.directory+"/", ""), args) - logger.info("Adding results from directory '%s'", r.dir) - - # check if the results files are there - dev_file = os.path.join(nonorm, args.dev) - eval_file = os.path.join(nonorm, args.eval) - if os.path.isfile(dev_file): - if os.path.isfile(eval_file): - r.nonorm(dev_file, eval_file) - else: - r.nonorm(dev_file) - - if ztnorm: - dev_file = os.path.join(ztnorm, args.dev) - eval_file = os.path.join(ztnorm, args.eval) - if os.path.isfile(dev_file): - if os.path.isfile(eval_file): - r.ztnorm(dev_file, eval_file) - else: - r.ztnorm(dev_file) - - return r - - -def recurse(args, path): - """Recurse the directory structure and collect all results that are stored in the desired file names.""" - dir_list = os.listdir(path) - results = [] - - # check if the score directories are included in the current path - if args.nonorm in dir_list or args.nonorm == '.': - if args.ztnorm in dir_list or args.ztnorm == '.': - return results + [add_results(args, os.path.join(path, args.nonorm), os.path.join(path, args.ztnorm))] - else: - return results + [add_results(args, os.path.join(path, args.nonorm))] - - for e in dir_list: - real_path = os.path.join(path, e) - if os.path.isdir(real_path): - r = recurse(args, real_path) - if r is not None: - results += r - - return results - - -def table(results): - """Generates a table containing all results in a nice format.""" - A = " "*2 + 'dev nonorm'+ " "*5 + 'dev ztnorm' + " "*6 + 'eval nonorm' + " "*4 + 'eval ztnorm' + " "*12 + 'directory\n' - A += "-"*100+"\n" - for r in results: - if r.valid(): - A += str(r) + "\n" - return A - - -def main(command_line_parameters = None): - """Iterates through the desired directory and collects all result files.""" - args = command_line_arguments(command_line_parameters) - - results = [] - # collect results - directories = glob.glob(args.directory) - for directory in directories: - r = recurse(args, directory) - if r is not None: - results += r - - # sort results if desired - if args.sort: - import operator - results.sort(key=operator.attrgetter(args.key.replace('-','_'))) - - # print the results - if args.self_test: - table(results) - elif args.output: - f = open(args.output, "w") - f.writelines(table(results)) - f.close() - else: - print (table(results)) diff --git a/bob/bio/base/script/enroll.py b/bob/bio/base/script/enroll.py deleted file mode 100644 index 842c9347..00000000 --- a/bob/bio/base/script/enroll.py +++ /dev/null @@ -1,62 +0,0 @@ -"""This script can be used to enroll a model from several features using the given algorithm. -""" - -import argparse -import bob.core -logger = bob.core.log.setup("bob.bio.base") - -import bob.bio.base - - -def command_line_arguments(command_line_parameters): - """Parse the program options""" - - # set up command line parser - parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('-a', '--algorithm', metavar = 'x', nargs = '+', required = True, help = 'Biometric recognition; registered algorithms are: %s' % bob.bio.base.resource_keys('algorithm')) - parser.add_argument('-e', '--extractor', metavar = 'x', nargs = '+', required = True, help = 'Feature extraction; registered feature extractors are: %s' % bob.bio.base.resource_keys('extractor')) - parser.add_argument('-P', '--projector-file', metavar = 'FILE', help = 'The pre-trained extractor file, if the algorithm performs projection') - parser.add_argument('-E', '--enroller-file', metavar = 'FILE', help = 'The pre-trained enroller file, if the extractor requires enroller training') - parser.add_argument('-i', '--input-files', metavar = 'FEATURE', nargs='+', required = True, help = "A list of feature files to enroll the model from") - parser.add_argument('-o', '--output-file', metavar = 'MODEL', default = 'model.hdf5', help = "The file to write the enrolled model into (should be of type HDF5)") - - # add verbose option - bob.core.log.add_command_line_option(parser) - # parse arguments - args = parser.parse_args(command_line_parameters) - # set verbosity level - bob.core.log.set_verbosity_level(logger, args.verbose) - - return args - - -def main(command_line_parameters=None): - """Preprocesses the given image with the given preprocessor.""" - args = command_line_arguments(command_line_parameters) - - logger.debug("Loading extractor") - extractor = bob.bio.base.load_resource(' '.join(args.extractor), "extractor") - - logger.debug("Loading algorithm") - algorithm = bob.bio.base.load_resource(' '.join(args.algorithm), "algorithm") - if algorithm.requires_projector_training: - if args.projector_file is None: - raise ValueError("The desired algorithm requires a pre-trained projector file, but it was not specified") - algorithm.load_projector(args.projector_file) - - if algorithm.requires_enroller_training: - if args.enroller_file is None: - raise ValueError("The desired algorithm requires a pre-trained enroller file, but it was not specified") - algorithm.load_enroller(args.enroller_file) - - logger.debug("Loading %d features for enrollment", len(args.input_files)) - features = [extractor.read_feature(f) for f in args.input_files] - if algorithm.use_projected_features_for_enrollment: - logger.debug("Projecting enrollment features") - features = [algorithm.project(f) for f in features] - - logger.debug("Enrolling model") - model = algorithm.enroll(features) - algorithm.write_model(model, args.output_file) - logger.info("Wrote model to file '%s'", args.output_file) diff --git a/bob/bio/base/script/extract.py b/bob/bio/base/script/extract.py deleted file mode 100644 index 549d0579..00000000 --- a/bob/bio/base/script/extract.py +++ /dev/null @@ -1,52 +0,0 @@ -"""This script can be used to extract features using the given extractor from the given preprocessed image. -""" - -import argparse -import bob.core -logger = bob.core.log.setup("bob.bio.base") - -import bob.bio.base - - -def command_line_arguments(command_line_parameters): - """Parse the program options""" - - # set up command line parser - parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('-e', '--extractor', metavar = 'x', nargs = '+', required = True, help = 'Feature extraction; registered feature extractors are: %s' % bob.bio.base.resource_keys('extractor')) - parser.add_argument('-E', '--extractor-file', metavar = 'FILE', help = "The pre-trained extractor file, if the extractor requires training") - parser.add_argument('-p', '--preprocessor', metavar = 'x', nargs = '+', required = True, help = 'Data preprocessing; registered preprocessors are: %s' % bob.bio.base.resource_keys('preprocessor')) - parser.add_argument('-i', '--input-file', metavar = 'PREPROCESSED', required = True, help = "The preprocessed data file to read.") - parser.add_argument('-o', '--output-file', metavar = 'FEATURE', default = 'extracted.hdf5', help = "The file to write the extracted features into (should be of type HDF5)") - - # add verbose option - bob.core.log.add_command_line_option(parser) - # parse arguments - args = parser.parse_args(command_line_parameters) - # set verbosity level - bob.core.log.set_verbosity_level(logger, args.verbose) - - return args - - -def main(command_line_parameters=None): - """Preprocesses the given image with the given preprocessor.""" - args = command_line_arguments(command_line_parameters) - - logger.debug("Loading preprocessor") - preprocessor = bob.bio.base.load_resource(' '.join(args.preprocessor), "preprocessor") - logger.debug("Loading extractor") - extractor = bob.bio.base.load_resource(' '.join(args.extractor), "extractor") - if extractor.requires_training: - if args.extractor_file is None: - raise ValueError("The desired extractor requires a pre-trained extractor file, but it was not specified") - extractor.load(args.extractor_file) - - logger.debug("Loading preprocessed data from file '%s'", args.input_file) - preprocessed = preprocessor.read_data(args.input_file) - - logger.info("Extracting features") - extracted = extractor(preprocessed) - extractor.write_feature(extracted, args.output_file) - logger.info("Wrote extracted features to file '%s'", args.output_file) diff --git a/bob/bio/base/script/fuse_scores.py b/bob/bio/base/script/fuse_scores.py deleted file mode 100755 index 220791dc..00000000 --- a/bob/bio/base/script/fuse_scores.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# Laurent El Shafey <laurent.el-shafey@idiap.ch> -# Elie El Khoury <elie.khoury@idiap.ch> -# Manuel Guenther <siebenkopf@googlemail.com> -# Mon 13 Jul 11:55:34 CEST 2015 - -"""This script fuses scores from various systems, from a score file in four or five column format. - -Note: The score file has to contain the exact probe file names as the 3rd (4column) or 4th (5column) column. -The resulting fused score files will be written in 4 column format. -""" - - - -import bob, os, sys -import bob.learn.linear - -import bob.core -from .. import score -logger = bob.core.log.setup("bob.bio.base") - -def parse_command_line(command_line_options): - """Parse the program options""" - - usage = 'usage: %s [arguments]' % os.path.basename(sys.argv[0]) - - import argparse - parser = argparse.ArgumentParser(usage=usage, description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - # This option is not normally shown to the user... - parser.add_argument('-d', '--dev-files', required=True, nargs='+', help = "A list of score files of the development set.") - parser.add_argument('-e', '--eval-files', nargs='+', help = "A list of score files of the evaluation set; if given it must be the same number of files as the --dev-files.") - parser.add_argument('-f', '--fused-dev-file', required = True, help = 'The fused development score file in 4 column format.') - parser.add_argument('-g', '--fused-eval-file', help = 'The fused evaluation score file in 4 column format.') - parser.add_argument('-p', '--parser', default = '4column', choices = ('4column', '5column'), help = "The style of the resulting score files. The default fits to the usual output of score files.") - - parser.add_argument('-m', '--max-iterations', type=int, default=10000, help = "Select the maximum number of iterations for the LLR training") - parser.add_argument('-t', '--convergence-threshold', type=float, default=1e-10, help = "Select the convergence threshold for the LLR training") - parser.add_argument('-n', '--no-whitening', action="store_true", help = "If given, disable the score mean/std-normalization prior to fusion (this is not recommended)") - - # enable logging - bob.core.log.add_command_line_option(parser) - args = parser.parse_args(command_line_options) - bob.core.log.set_verbosity_level(logger, args.verbose) - - if args.eval_files is not None and len(args.eval_files) != len(args.dev_files): - raise ValueError("When --eval-files are specified, there need to be exactly one eval file for each dev file") - - if args.eval_files is not None and args.fused_eval_file is None: - raise ValueError("When --eval-files are specified, the --fused-eval-file needs to be given, too") - - return args - - -def main(command_line_options = None): - """Score Fusion using Logistic regression""" - args = parse_command_line(command_line_options) - - # read data - n_systems = len(args.dev_files) - for i in range(n_systems): - if not os.path.isfile(args.dev_files[i]): raise IOError("The given score file does not exist") - - # collect training data from development sets - data = [] - for i in range(n_systems): - logger.info("Loading development set score file '%s'", args.dev_files[i]) - # pythonic way: create inline dictionary "{...}", index with desired value "[...]", execute function "(...)" - data.append({'4column' : score.split_four_column, '5column' : score.split_five_column}[args.parser](args.dev_files[i])) - import numpy - - trainer = bob.learn.linear.CGLogRegTrainer(0.5, args.convergence_threshold, args.max_iterations, mean_std_norm=not args.no_whitening) - data_neg = numpy.vstack(data[k][0] for k in range(n_systems)).T - data_pos = numpy.vstack(data[k][1] for k in range(n_systems)).T - machine = trainer.train(data_neg, data_pos) - - # fuse development scores - gen_data_dev = [] - for i in range(n_systems): - logger.info("Loading development set score file '%s'", args.dev_files[i]) - gen_data_dev.append({'4column' : score.four_column, '5column' : score.five_column}[args.parser](args.dev_files[i])) - - logger.info("Writing fused development set score file '%s'", args.fused_dev_file) - outf = open(args.fused_dev_file, 'w') - for line in gen_data_dev[0]: - claimed_id = line[0] - real_id = line[-3] - test_label = line[-2] - scores= [ line[-1] ] - for n in range(1, n_systems): - scores.append(next(gen_data_dev[n])[-1]) - scores = numpy.array([scores], dtype=numpy.float64) - s_fused = machine.forward(scores)[0,0] - line = claimed_id + " " + real_id + " " + test_label + " " + str(s_fused) + "\n" - outf.write(line) - - # fuse evaluation scores - if args.eval_files is not None: - gen_data_eval = [] - for i in range(n_systems): - logger.info("Loading evaluation set score file '%s'", args.eval_files[i]) - gen_data_eval.append({'4column' : score.four_column, '5column' : score.five_column}[args.parser](args.eval_files[i])) - - logger.info("Writing fused evaluation set score file '%s'", args.fused_eval_file) - outf = open(args.fused_eval_file, 'w') - for line in gen_data_eval[0]: - claimed_id = line[0] - real_id = line[-3] - test_label = line[-2] - scores= [ line[-1] ] - for n in range(1, n_systems): - scores.append(next(gen_data_eval[n])[-1]) - scores = numpy.array([scores], dtype=numpy.float64) - s_fused = machine.forward(scores)[0,0] - line = claimed_id + " " + real_id + " " + test_label + " " + str(s_fused) + "\n" - outf.write(line) diff --git a/bob/bio/base/script/grid_search.py b/bob/bio/base/script/grid_search.py deleted file mode 100755 index 975843f5..00000000 --- a/bob/bio/base/script/grid_search.py +++ /dev/null @@ -1,442 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# Manuel Guenther <Manuel.Guenther@idiap.ch> - -from __future__ import print_function - -from . import verify - -import argparse, os, sys -import copy # for deep copies of dictionaries -from .. import utils, tools -from ..tools import is_idiap - -import bob.core -logger = bob.core.log.setup("bob.bio.base") - -# the configuration read from config file -global configuration -# the place holder key given on command line -global place_holder_key -# the extracted command line arguments -global args -# the job ids as returned by the call to the verify function -global job_ids -# first fake job id (useful for the --dry-run option) -global fake_job_id -fake_job_id = 0 -# the number of grid jobs that are executed -global job_count -# the total number of experiments run -global task_count -# the directories, where score files will be generated -global score_directories - - -# The different steps of the processing chain. -# Use these keywords to change parameters of the specific part -steps = ['preprocess', 'extract', 'project', 'enroll', 'score'] - - -def command_line_options(command_line_parameters): - # set up command line parser - parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('-c', '--configuration-file', required = True, - help = 'The file containing the information what parameters you want to have tested.') - - parser.add_argument('-k', '--place-holder-key', default = '#', - help = 'The place holder key that starts the place holders which will be replaced.') - - parser.add_argument('-d', '--database', required = True, - help = 'The database that you want to execute the experiments on.') - - parser.add_argument('-P', '--protocol', - help = 'The protocol that you want to use (if not specified, the default protocol for the database is used).') - - parser.add_argument('-s', '--sub-directory', required = True, - help = 'The sub-directory where the files of the current experiment should be stored. Please specify a directory name with a name describing your experiment.') - - parser.add_argument('-p', '--preprocessor', - help = "The preprocessing to be used (will overwrite the 'preprocessor' in the configuration file)") - - parser.add_argument('-e', '--extractor', - help = "The features to be extracted (will overwrite the 'extractor' in the configuration file)") - - parser.add_argument('-a', '--algorithm', - help = "The recognition algorithms to be employed (will overwrite the 'algorithm' in the configuration file)") - - parser.add_argument('-g', '--grid', - help = 'The SGE grid configuration') - - parser.add_argument('-l', '--parallel', type=int, - help = 'Run the algorithms in parallel on the local machine, using the given number of parallel threads') - - parser.add_argument('-L', '--gridtk-database-split-level', metavar='LEVEL', type=int, default=-1, - help = 'Split the gridtk databases after the following level -1 - never split; 0 - preprocess; 1 - extract; 2 -- project; 3 -- enroll; 4 -- score;') - - parser.add_argument('-x', '--executable', metavar='X', - help = '(optional) The executable to be executed instead of bob/bio/base/verify.py (which is taken *always* from bob.bio.base, not from the bin directory)') - - parser.add_argument('-R', '--result-directory', metavar='DIR', - help = 'The directory where to write the resulting score files to.') - - parser.add_argument('-T', '--temp-directory', metavar='DIR', - help = 'The directory where to write temporary files into.') - - parser.add_argument('-i', '--preprocessed-directory', metavar='DIR', - help = '(optional) The directory where to read the already preprocessed data from (no preprocessing is performed in this case).') - - parser.add_argument('-G', '--gridtk-database-directory', metavar='DIR', default = 'grid_db', - help = 'Directory where the submitted.sql3 files should be written into (will create sub-directories on need)') - - parser.add_argument('-q', '--dry-run', action='store_true', - help = 'Just write the commands to console and mimic dependencies, but do not execute the commands') - - parser.add_argument('-j', '--skip-when-existent', action='store_true', - help = 'Skip the submission/execution of jobs when the result directory already exists') - - parser.add_argument('-N', '--replace-variable', - help = 'Use the given variable instead of the "replace" keyword in the configuration file') - - parser.add_argument('parameters', nargs = argparse.REMAINDER, - help = "Parameters directly passed to the verify.py script. Use -- to separate this parameters from the parameters of this script. See 'verify.py --help' for a complete list of options.") - - bob.core.log.add_command_line_option(parser) - - global args - args = parser.parse_args(command_line_parameters) - bob.core.log.set_verbosity_level(logger, args.verbose) - - # set base directories - if args.temp_directory is None: - args.temp_directory = "/idiap/temp/%s/grid_search" % os.environ["USER"] if is_idiap() else "temp/grid_search" - if args.result_directory is None: - args.result_directory = "/idiap/user/%s/grid_search" % os.environ["USER"] if is_idiap() else "results/grid_search" - - - if args.executable: - global verify - verify = __import__('importlib').import_module(args.executable) - - - - -def extract_values(replacements, indices): - """Extracts the value dictionary from the given dictionary of replacements""" - extracted_values = {} - for place_holder in replacements.keys(): - # get all occurrences of the place holder key - parts = place_holder.split(place_holder_key) - # only one part -> no place holder key found -> no strings to be extracted - if len(parts) == 1: - continue - - keys = [part[:1] for part in parts[1:]] - - value_index = indices[place_holder] - - entries = replacements[place_holder] - entry_key = sorted(entries.keys())[value_index] - - # check that the keys are unique - for key in keys: - if key in extracted_values: - raise ValueError("The replacement key '%s' was defined multiple times. Please use each key only once."%key) - - # extract values - if len(keys) == 1: - extracted_values[keys[0]] = entries[entry_key] - - else: - for i in range(len(keys)): - extracted_values[keys[i]] = entries[entry_key][i] - - return extracted_values - - -def replace(string, replacements): - """Replaces the place holders in the given string with the according values from the values dictionary.""" - # get all occurrences of the place holder key - parts = string.split(place_holder_key) - # only one part -> no place holder key found -> return the whole string - if len(parts) == 1: - return string - - keys = [part[:1] for part in parts[1:]] - - retval = parts[0] - for i in range(0, len(keys)): - # replace the place holder by the desired string and add the remaining of the command - retval += str(replacements[keys[i]]) + str(parts[i+1][1:]) - - return retval - - -def create_command_line(replacements): - """Creates the parameters for the function call that will be given to the verify script.""" - # get the values to be replaced with - values = {} - for key in configuration.replace: - values.update(extract_values(configuration.replace[key], replacements)) - # replace the place holders with the values - call = ['--database', args.database] - if args.protocol: - call += ['--protocol', args.protocol] - call += ['--temp-directory', args.temp_directory, '--result-directory', args.result_directory] - return call + [ - '--preprocessor', replace(configuration.preprocessor, values), - '--extractor', replace(configuration.extractor, values), - '--algorithm', replace(configuration.algorithm, values), - '--imports' - ] + configuration.imports - - - -# Parts that could be skipped when the dependecies are on the indexed level -skips = [[''], - ['--skip-preprocessing'], - ['--skip-extractor-training', '--skip-extraction'], - ['--skip-projector-training', '--skip-projection'], - ['--skip-enroller-training', '--skip-enrollment'] - ] - -# The keywords to parse the job ids to get the according dependencies right -dependency_keys = ['DUMMY', 'preprocess', 'extract', 'project', 'enroll'] - - -def directory_parameters(directories): - """This function generates the verify parameters that define the directories, where the data is stored. - The directories are set such that data is reused whenever possible, but disjoint if needed.""" - def _join_dirs(index, subdir): - # collect sub-directories - dirs = [] - for i in range(index+1): - dirs += directories[steps[i]] - if not dirs: - return subdir - else: - dir = dirs[0] - for d in dirs[1:]: - dir = os.path.join(dir, d) - return os.path.join(dir, subdir) - - global args - parameters = [] - - # add directory parameters - # - preprocessing - if args.preprocessed_directory: - parameters += ['--preprocessed-directory', os.path.join(args.preprocessed_directory, _join_dirs(0, 'preprocessed'))] + skips[1] - else: - parameters += ['--preprocessed-directory', _join_dirs(0, 'preprocessed')] - - # - feature extraction - parameters += ['--extracted-directory', _join_dirs(1, 'extracted'), '--extractor-file', _join_dirs(1, 'Extractor.hdf5')] - - # - feature projection - parameters += ['--projected-directory', _join_dirs(2, 'projected'), '--projector-file', _join_dirs(2, 'Projector.hdf5')] - - # - model enrollment - parameters += ['--model-directories', _join_dirs(3, 'N-Models'), _join_dirs(3, 'T-Models'), '--enroller-file', _join_dirs(3, 'Enroller.hdf5')] - - # - scoring - parameters += ['--score-directories', _join_dirs(4, 'nonorm'), _join_dirs(4, 'ztnorm')] - - # - Experiment.info - parameters += ['--experiment-info-file', _join_dirs(4, 'Experiment.info')] - - # the sub-dorectory, given on command line - parameters += ['--sub-directory', args.sub_directory] - - global score_directories - score_directories.append(_join_dirs(4, '.')) - - # grid database - if args.grid is not None or args.parallel is not None: - # we get one database per preprocessing job (all others might have job inter-dependencies) - parameters += ['--gridtk-database-file', os.path.join(args.gridtk_database_directory, _join_dirs(args.gridtk_database_split_level, 'submitted.sql3'))] - - return parameters - - -def check_requirements(replacements): - # check if the requirement are met - global configuration - values = {} - for key in configuration.replace: - # check that the key is one of the known steps - if key not in steps: - raise ValueError("The step '%s' defined in the configuration file '%s' is unknown; choose one of %s" % (key, args.configuration_file, steps)) - values.update(extract_values(configuration.replace[key], replacements)) - for requirement in configuration.requirements: - test = replace(requirement, values) - while not isinstance(test, bool): - test = eval(test) - if not test: - return False - return True - - -def execute_dependent_task(command_line, directories, dependency_level): - # add other command line arguments - if args.grid: - command_line += ['--grid', args.grid, '--stop-on-failure'] - if args.parallel: - command_line += ['--parallel', str(args.parallel)] - - if args.verbose: - command_line += ['-' + 'v'*args.verbose] - - # create directory parameters - command_line += directory_parameters(directories) - - # add skip parameters according to the dependency level - for i in range(1, dependency_level+1): - command_line += skips[i] - - if args.parameters is not None: - command_line += args.parameters[1:] - - # extract dependencies - global job_ids - dependencies = [] - for k in sorted(job_ids.keys()): - for i in range(1, dependency_level+1): - if k.find(dependency_keys[i]) != -1: - dependencies.append(job_ids[k]) - - # add dependencies - if dependencies: - command_line += ['--external-dependencies'] + [str(d) for d in sorted(list(set(dependencies)))] - - # execute the command - new_job_ids = {} - try: - verif_args = verify.parse_arguments(command_line) - result_dirs = [os.path.join(verif_args.result_directory, verif_args.database.protocol, verif_args.score_directories[i]) for i in ((0,1) if verif_args.zt_norm else (0,))] - if not args.skip_when_existent or not all(os.path.exists(result_dir) for result_dir in result_dirs): - # get the command line parameter for the result directory - if args.dry_run: - if args.verbose: - print ("Would have executed job", tools.command_line(command_line)) - else: - # execute the verification experiment - global fake_job_id - new_job_ids = verify.verify(verif_args, command_line, external_fake_job_id = fake_job_id) - else: - logger.info("Skipping execution of %s since result directories '%s' already exists", tools.command_line(command_line), result_dirs) - - except Exception as e: - logger.error("The execution of job was rejected!\n%s\n Reason:\n%s", tools.command_line(command_line), e) - - # some statistics - global job_count, task_count - job_count += len(new_job_ids) - task_count += 1 - fake_job_id += 100 - job_ids.update(new_job_ids) - - -def create_recursive(replace_dict, step_index, directories, dependency_level, keys=[]): - """Iterates through all the keywords and replaces all place holders with all keywords in a defined order.""" - - # check if we are at the lowest level - if step_index == len(steps): - # create a call and execute it - if check_requirements(replace_dict): - execute_dependent_task(create_command_line(replace_dict), directories, dependency_level) - else: - if steps[step_index] not in directories: - directories[steps[step_index]] = [] - - # we are at another level - if steps[step_index] not in configuration.replace.keys(): - # nothing to be replaced here, so just go to the next level - create_recursive(replace_dict, step_index+1, directories, dependency_level) - else: - # iterate through the keys - if keys == []: - # call this function recursively by defining the set of keys that we need - create_recursive(replace_dict, step_index, directories, dependency_level, keys = sorted(configuration.replace[steps[step_index]].keys())) - else: - # create a deep copy of the replacement dict to be able to modify it - replace_dict_copy = copy.deepcopy(replace_dict) - directories_copy = copy.deepcopy(directories) - # iterate over all replacements for the first of the keys - key = keys[0] - replacement_directories = sorted(configuration.replace[steps[step_index]][key]) - directories_copy[steps[step_index]].append("") - new_dependency_level = dependency_level - for replacement_index in range(len(replacement_directories)): - # increase the counter of the current replacement - replace_dict_copy[key] = replacement_index - directories_copy[steps[step_index]][-1] = replacement_directories[replacement_index] - # call the function recursively - if len(keys) == 1: - # we have to go to the next level - create_recursive(replace_dict_copy, step_index+1, directories_copy, new_dependency_level) - else: - # we have to subtract the keys - create_recursive(replace_dict_copy, step_index, directories_copy, new_dependency_level, keys = keys[1:]) - new_dependency_level = step_index - - -def main(command_line_parameters = None): - """Main entry point for the parameter test. Try --help to see the parameters that can be specified.""" - - global task_count, job_count, job_ids, score_directories - job_count = 0 - task_count = 0 - job_ids = {} - score_directories = [] - - command_line_options(command_line_parameters) - - global configuration, place_holder_key - configuration = utils.read_config_file([args.configuration_file]) - place_holder_key = args.place_holder_key - - if args.preprocessor: - configuration.preprocessor = args.preprocessor - if args.extractor: - configuration.extractor = args.extractor - if args.algorithm: - configuration.algorithm = args.algorithm - - if args.replace_variable is not None: - exec("configuration.replace = configuration.%s" % args.replace_variable) - - for attribute in ('preprocessor', 'extractor', 'algorithm'): - if not hasattr(configuration, attribute): - raise ValueError("The given configuration file '%s' does not contain the required attribute '%s', and it was not given on command line either" %(args.configuration_file, attribute)) - - # extract the dictionary of replacements from the configuration - if not hasattr(configuration, 'replace'): - raise ValueError("Please define a set of replacements using the 'replace' keyword.") - if not hasattr(configuration, 'imports'): - configuration.imports = ['bob.bio.base'] - logger.info("No 'imports' specified in configuration file '%s' -> using default %s", args.configuration_file, configuration.imports) - - if not hasattr(configuration, 'requirements'): - configuration.requirements = [] - - replace_dict = {} - for step, replacements in configuration.replace.items(): - for key in replacements.keys(): - if key in replace_dict: - raise ValueError("The replacement key '%s' was defined multiple times. Please use each key only once.") - # we always start with index 0. - replace_dict[key] = 0 - - # now, iterate through the list of replacements and create the according calls - create_recursive(replace_dict, step_index = 0, directories = {}, dependency_level = 0) - - # finally, write some information about the - if args.grid is not None: - logger.info("The number of executed tasks is: %d, which are split up into %d jobs that are executed in the grid" , task_count, job_count) - - if args.parallel is not None: - logger.info("The total amount of finsihed tasks is: %d", task_count) - - return score_directories diff --git a/bob/bio/base/script/preprocess.py b/bob/bio/base/script/preprocess.py deleted file mode 100644 index 7aef2986..00000000 --- a/bob/bio/base/script/preprocess.py +++ /dev/null @@ -1,64 +0,0 @@ -"""This script can be used to preprocess a single data file with a given preprocessor. -""" - -import argparse -import bob.core -logger = bob.core.log.setup("bob.bio.base") - -import bob.bio.base -from bob.bio.base.database.file import BioFile -import bob.db.base -import numpy - -import bob.core -import bob.io.base -try: - import bob.io.image -except ImportError: - pass - - -def command_line_arguments(command_line_parameters): - """Parse the program options""" - - # set up command line parser - parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('-p', '--preprocessor', metavar = 'x', nargs = '+', required = True, help = 'Data preprocessing; registered preprocessors are: %s' % bob.bio.base.resource_keys('preprocessor')) - parser.add_argument('-i', '--input-file', metavar = 'FILE', required = True, help = "The data file to be preprocessed.") -# parser.add_argument('-a', '--annotations', nargs='+', help = "Key=value-pairs for the annotations") - parser.add_argument('-a', '--annotation-file', metavar = 'FILE', help = "The annotation file for the given data file, if applicable and/or available; currently the only supported format is the 'named' annotation format.") - parser.add_argument('-o', '--output-file', metavar = 'PREPROCESSED', default = 'preprocessed.hdf5', help = "Write the preprocessed data into this file (should be of type HDF5)") - parser.add_argument('-c', '--convert-as-image', metavar = 'IMAGE', help = "Write the preprocessed data into this image file, converting it to an image, if possible") - - # add verbose option - bob.core.log.add_command_line_option(parser) - - # parse arguments - args = parser.parse_args(command_line_parameters) - - # set verbosity level - bob.core.log.set_verbosity_level(logger, args.verbose) - - return args - -def main(command_line_parameters=None): - """Preprocesses the given image with the given preprocessor.""" - args = command_line_arguments(command_line_parameters) - - logger.debug("Loading preprocessor") - preprocessor = bob.bio.base.load_resource(' '.join(args.preprocessor), "preprocessor") - - logger.debug("Loading input data from file '%s'%s", args.input_file, " and '%s'" % args.annotation_file if args.annotation_file is not None else "") - data = preprocessor.read_original_data(BioFile(1, args.input_file, 2), "", "") - annotations = bob.db.base.annotations.read_annotation_file(args.annotation_file, 'named') if args.annotation_file is not None else None - - logger.info("Preprocessing data") - preprocessed = preprocessor(data, annotations) - preprocessor.write_data(preprocessed, args.output_file) - logger.info("Wrote preprocessed data to file '%s'", args.output_file) - - if args.convert_as_image is not None: - converted = bob.core.convert(preprocessed, 'uint8', dest_range=(0,255), source_range=(numpy.min(preprocessed), numpy.max(preprocessed))) - bob.io.base.save(converted, args.convert_as_image) - logger.info("Wrote preprocessed data to image file '%s'", args.convert_as_image) diff --git a/bob/bio/base/script/score.py b/bob/bio/base/script/score.py deleted file mode 100644 index 3432ffa7..00000000 --- a/bob/bio/base/script/score.py +++ /dev/null @@ -1,66 +0,0 @@ -"""This script can be used to compute scores between a list of enrolled models and a list of probe files. -""" - -from __future__ import print_function - -import argparse -import bob.core -logger = bob.core.log.setup("bob.bio.base") - -import bob.bio.base - - -def command_line_arguments(command_line_parameters): - """Parse the program options""" - - # set up command line parser - parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('-a', '--algorithm', metavar = 'x', nargs = '+', required = True, help = 'Biometric recognition; registered algorithms are: %s' % bob.bio.base.resource_keys('algorithm')) - parser.add_argument('-e', '--extractor', metavar = 'x', nargs = '+', required = True, help = 'Feature extraction; registered feature extractors are: %s' % bob.bio.base.resource_keys('extractor')) - parser.add_argument('-P', '--projector-file', metavar = 'FILE', help = 'The pre-trained projector file, if the algorithm performs projection') - parser.add_argument('-E', '--enroller-file' , metavar = 'FILE', help = 'The pre-trained enroller file, if the extractor requires enroller training') - parser.add_argument('-m', '--model-files', metavar = 'MODEL', nargs='+', required = True, help = "A list of enrolled model files") - parser.add_argument('-p', '--probe-files', metavar = 'PROBE', nargs='+', required = True, help = "A list of extracted feature files used as probes") - - # add verbose option - bob.core.log.add_command_line_option(parser) - # parse arguments - args = parser.parse_args(command_line_parameters) - # set verbosity level - bob.core.log.set_verbosity_level(logger, args.verbose) - - return args - - -def main(command_line_parameters=None): - """Preprocesses the given image with the given preprocessor.""" - args = command_line_arguments(command_line_parameters) - - logger.debug("Loading extractor") - extractor = bob.bio.base.load_resource(' '.join(args.extractor), "extractor") - logger.debug("Loading algorithm") - algorithm = bob.bio.base.load_resource(' '.join(args.algorithm), "algorithm") - if algorithm.requires_projector_training: - if args.projector_file is None: - raise ValueError("The desired algorithm requires a pre-trained projector file, but it was not specified") - algorithm.load_projector(args.projector_file) - - if algorithm.requires_enroller_training: - if args.enroller_file is None: - raise ValueError("The desired algorithm requires a pre-trained enroller file, but it was not specified") - algorithm.load_enroller(args.enroller_file) - - models, probes = {}, {} - logger.debug("Loading %d models", len(args.model_files)) - for m in args.model_files: models[m] = algorithm.read_model(m) - logger.debug("Loading %d probes", len(args.probe_files)) - for p in args.probe_files: probes[p] = extractor.read_feature(p) - if algorithm.performs_projection: - logger.debug("Projecting %d probes", len(args.probe_files)) - for p in probes: probes[p] = algorithm.project(probes[p]) - - logger.info("Computing scores") - for p in args.probe_files: - for m in args.model_files: - print("Score between model '%s' and probe '%s' is %3.8f" % (m, p, algorithm.score(models[m], probes[p]))) diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py new file mode 100644 index 00000000..0d5ea0c0 --- /dev/null +++ b/bob/bio/base/script/vanilla_biometrics.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Tiago de Freitas Pereira <tiago.pereira@idiap.ch> + + +"""Executes biometric pipeline""" + +import os +import functools + +import click + +from bob.extension.scripts.click_helper import verbosity_option, ResourceOption, ConfigCommand + + +EPILOG = """\b + + + Command line examples\n + ----------------------- + + + $ bob pipelines vanilla-biometrics my_experiment.py -vv + + + my_experiment.py must contain the following elements: + + >>> preprocessor = my_preprocessor() \n + >>> extractor = my_extractor() \n + >>> algorithm = my_algorithm() \n + >>> checkpoints = EXPLAIN CHECKPOINTING \n + +\b + + +Look at the following example + + $ bob pipelines vanilla-biometrics ./bob/pipelines/config/distributed/sge_iobig_16cores.py \ + ./bob/pipelines/config/database/mobio_male.py \ + ./bob/pipelines/config/baselines/facecrop_pca.py + +\b + + + +TODO: Work out this help + +""" + + +@click.command( + entry_point_group='bob.pipelines.config', cls=ConfigCommand, + epilog=EPILOG, +) +@click.option( + "--preprocessor", + "-p", + required=True, + cls=ResourceOption, + entry_point_group="bob.pipelines.preprocessors", # This should be linked to bob.bio.base + help="Data preprocessing algorithm", +) +@click.option( + "--extractor", + "-e", + required=True, + cls=ResourceOption, + entry_point_group="bob.pipelines.extractor", # This should be linked to bob.bio.base + help="Feature extraction algorithm", +) +@click.option( + "--algorithm", + "-a", + required=True, + cls=ResourceOption, + entry_point_group="bob.pipelines.biometric_algorithm", # This should be linked to bob.bio.base + help="Biometric Algorithm (class that implements the methods: `fit`, `enroll` and `score`)", +) +@click.option( + "--database", + "-d", + required=True, + cls=ResourceOption, + entry_point_group="bob.pipelines.database", # This should be linked to bob.bio.base + help="Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)", +) +@click.option( + "--dask-client", + "-l", + required=True, + cls=ResourceOption, + entry_point_group="bob.pipelines.client", # This should be linked to bob.bio.base + help="Dask client for the execution of the pipeline.", +) +@click.option( + "--checkpointing", "-c", is_flag=True, help="Save checkpoints in this experiment?" +) +@click.option( + "--group", + "-g", + type=click.Choice(["dev", "eval"]), + multiple=True, + default=("dev",), + help="If given, this value will limit the experiments belonging to a particular protocolar group", +) +@click.option( + "-o", + "--output", + show_default=True, + default="results", + help="Name of output directory", +) +@verbosity_option(cls=ResourceOption) +def vanilla_biometrics( + preprocessor, + extractor, + algorithm, + database, + dask_client, + checkpointing, + group, + output, + **kwargs +): + """Runs the simplest biometrics pipeline. + + Such pipeline consists into three sub-pipelines. + In all of them, given raw data as input it does the following steps: + + Sub-pipeline 1:\n + --------------- + + Training background model. Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. This sub-pipeline handles that and it consists of 3 steps: + + \b + raw_data --> preprocessing >> feature extraction >> train background model --> background_model + + + + \b + + Sub-pipeline 2:\n + --------------- + + Creation of biometric references: This is a standard step in a biometric pipelines. + Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. This sub-pipeline handles that in 3 steps and they are the following: + + \b + raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference + + Note that this sub-pipeline depends on the previous one + + + + Sub-pipeline 3:\n + --------------- + + + Probing: This is another standard step in biometric pipelines. Given one sample and one biometric reference, computes a score. Such score has different meanings depending on the scoring method your biometric algorithm uses. It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms. + + + raw_data --> preprocessing >> feature extraction >> probe(biometric_reference, background_model) --> score + + Note that this sub-pipeline depends on the two previous ones + + + """ + + # Always turn-on the checkpointing + checkpointing = True + + # Chooses the pipeline to run + from bob.bio.base.pipelines.vanilla_biometrics import biometric_pipeline + + if not os.path.exists(output): + os.makedirs(output) + + if checkpointing: + checkpoints = { + "background": { + "preprocessor": os.path.join(output, "background", "preprocessed"), + "extractor": os.path.join(output, "background", "extracted"), + # at least, the next stage must be provided! + "model": os.path.join(output, "background", "model"), + }, + "references": { + "preprocessor": os.path.join(output, "references", "preprocessed"), + "extractor": os.path.join(output, "references", "extracted"), + "enrolled": os.path.join(output, "references", "enrolled"), + }, + "probes": { + "preprocessor": os.path.join(output, "probes", "preprocessed"), + "extractor": os.path.join(output, "probes", "extracted"), + }, + } + + + # Defines the processing pipeline for loading samples + # Can add any number of steps! + pipeline = [("preprocessor",preprocessor), + ("extractor", extractor)] + + # Mechanism that loads samples + # from ..bob_bio.blocks import SampleLoader + from bob.bio.base.pipelines.annotated_blocks import SampleLoaderAnnotated as SampleLoader + loader = SampleLoader(pipeline) + + for g in group: + + result = biometric_pipeline( + database.background_model_samples(), + database.references(group=g), + database.probes(group=g), + loader, + algorithm, + npartitions=len(dask_client.cluster.workers), + checkpoints=checkpoints, + ) + + # result.visualize(os.path.join(output, "graph.pdf"), rankdir="LR") + result = result.compute(scheduler=dask_client) + for probe in result: + for reference in probe.samples: + print(reference.subject, probe.subject, probe.path, reference.data) + + dask_client.shutdown() diff --git a/bob/bio/base/script/verify.py b/bob/bio/base/script/verify.py deleted file mode 100644 index d27c2d35..00000000 --- a/bob/bio/base/script/verify.py +++ /dev/null @@ -1,442 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# Manuel Guenther <Manuel.Guenther@idiap.ch> -from __future__ import print_function - -import sys -import argparse - -import logging -logger = logging.getLogger("bob.bio.base") - -from .. import tools - - -def parse_arguments(command_line_parameters, exclude_resources_from = []): - """This function parses the given options (which by default are the command line options). If exclude_resources_from is specified (as a list), the resources from the given packages are not listed in the help message.""" - # set up command line parser - parsers = tools.command_line_parser(exclude_resources_from = exclude_resources_from) - - # Add sub-tasks that can be executed by this script - parser = parsers['main'] - parser.add_argument('--sub-task', - choices = ('preprocess', 'train-extractor', 'extract', 'train-projector', 'project', 'train-enroller', 'enroll', 'compute-scores', 'concatenate', 'calibrate'), - help = argparse.SUPPRESS) #'Executes a subtask (FOR INTERNAL USE ONLY!!!)' - parser.add_argument('--model-type', choices = ['N', 'T'], - help = argparse.SUPPRESS) #'Which type of models to generate (Normal or TModels)' - parser.add_argument('--score-type', choices = ['A', 'B', 'C', 'D', 'Z'], - help = argparse.SUPPRESS) #'The type of scores that should be computed' - parser.add_argument('--group', - help = argparse.SUPPRESS) #'The group for which the current action should be performed' - - # now that we have set up everything, get the command line arguments - args = tools.initialize(parsers, command_line_parameters, - skips = ['preprocessing', 'extractor-training', 'extraction', 'projector-training', 'projection', 'enroller-training', 'enrollment', 'score-computation', 'concatenation', 'calibration']) - - # check that none of the above arguments are used without the --sub-task - if args.sub_task is None: - if args.model_type is not None: raise ValueError("The option --model-type is an internal option and cannot be used to define experiments") - if args.score_type is not None: raise ValueError("The option --score-type is an internal option and cannot be used to define experiments") - if args.group is not None: raise ValueError("The option --group is an internal option and cannot be used to define experiments; did you mean to use --groups?") - - return args - - -def add_jobs(args, submitter): - """Adds all (desired) jobs of the tool chain to the grid, or to the local list to be executed.""" - - # collect the job ids - job_ids = {} - - # if there are any external dependencies, we need to respect them - deps = args.external_dependencies[:] - - jobs_to_execute = [] - - # preprocessing - if not args.skip_preprocessing: - if args.grid is None: - jobs_to_execute.append(('preprocess',)) - else: - job_ids['preprocessing'] = submitter.submit( - '--sub-task preprocess', - number_of_parallel_jobs = args.grid.number_of_preprocessing_jobs, - dependencies = deps, - **args.grid.preprocessing_queue) - deps.append(job_ids['preprocessing']) - - # feature extraction training - if not args.skip_extractor_training and args.extractor.requires_training: - if args.grid is None: - jobs_to_execute.append(('train-extractor',)) - else: - job_ids['extractor-training'] = submitter.submit( - '--sub-task train-extractor', - name = 'train-f', - dependencies = deps, - **args.grid.training_queue) - deps.append(job_ids['extractor-training']) - - # feature extraction - if not args.skip_extraction: - if args.grid is None: - jobs_to_execute.append(('extract',)) - else: - job_ids['extraction'] = submitter.submit( - '--sub-task extract', - number_of_parallel_jobs = args.grid.number_of_extraction_jobs, - dependencies = deps, - **args.grid.extraction_queue) - deps.append(job_ids['extraction']) - - # feature projection training - if not args.skip_projector_training and args.algorithm.requires_projector_training: - if args.grid is None: - jobs_to_execute.append(('train-projector',)) - else: - job_ids['projector-training'] = submitter.submit( - '--sub-task train-projector', - name="train-p", - dependencies = deps, - **args.grid.training_queue) - deps.append(job_ids['projector-training']) - - # feature projection - if not args.skip_projection and args.algorithm.performs_projection: - if args.grid is None: - jobs_to_execute.append(('project',)) - else: - job_ids['projection'] = submitter.submit( - '--sub-task project', - number_of_parallel_jobs = args.grid.number_of_projection_jobs, - dependencies = deps, - **args.grid.projection_queue) - deps.append(job_ids['projection']) - - # model enrollment training - if not args.skip_enroller_training and args.algorithm.requires_enroller_training: - if args.grid is None: - jobs_to_execute.append(('train-enroller',)) - else: - job_ids['enroller-training'] = submitter.submit( - '--sub-task train-enroller', - name = "train-e", - dependencies = deps, - **args.grid.training_queue) - deps.append(job_ids['enroller-training']) - - # enroll models - enroll_deps_n = {} - enroll_deps_t = {} - score_deps = {} - concat_deps = {} - for group in args.groups: - enroll_deps_n[group] = deps[:] - enroll_deps_t[group] = deps[:] - if not args.skip_enrollment: - if args.grid is None: - jobs_to_execute.append(('enroll', group, 'N')) - else: - job_ids['enroll-%s-N'%group] = submitter.submit( - '--sub-task enroll --group %s --model-type N'%group, - name = "enr-N-%s"%group, - number_of_parallel_jobs = args.grid.number_of_enrollment_jobs, - dependencies = deps, - **args.grid.enrollment_queue) - enroll_deps_n[group].append(job_ids['enroll-%s-N'%group]) - - if args.zt_norm: - if args.grid is None: - jobs_to_execute.append(('enroll', group, 'T')) - else: - job_ids['enroll-%s-T'%group] = submitter.submit( - '--sub-task enroll --group %s --model-type T'%group, - name = "enr-T-%s"%group, - number_of_parallel_jobs = args.grid.number_of_enrollment_jobs, - dependencies = deps, - **args.grid.enrollment_queue) - enroll_deps_t[group].append(job_ids['enroll-%s-T'%group]) - - # compute A,B,C, and D scores - if not args.skip_score_computation: - if args.grid is None: - jobs_to_execute.append(('compute-scores', group, None, 'A')) - else: - job_ids['score-%s-A'%group] = submitter.submit( - '--sub-task compute-scores --group %s --score-type A'%group, - name = "score-A-%s"%group, - number_of_parallel_jobs = args.grid.number_of_scoring_jobs, - dependencies = enroll_deps_n[group], - **args.grid.scoring_queue) - concat_deps[group] = [job_ids['score-%s-A'%group]] - - if args.zt_norm: - if args.grid is None: - jobs_to_execute.append(('compute-scores', group, None, 'B')) - jobs_to_execute.append(('compute-scores', group, None, 'C')) - jobs_to_execute.append(('compute-scores', group, None, 'D')) - jobs_to_execute.append(('compute-scores', group, None, 'Z')) - else: - job_ids['score-%s-B'%group] = submitter.submit( - '--sub-task compute-scores --group %s --score-type B'%group, - name = "score-B-%s"%group, - number_of_parallel_jobs = args.grid.number_of_scoring_jobs, - dependencies = enroll_deps_n[group], - **args.grid.scoring_queue) - - job_ids['score-%s-C'%group] = submitter.submit( - '--sub-task compute-scores --group %s --score-type C'%group, - name = "score-C-%s"%group, - number_of_parallel_jobs = args.grid.number_of_scoring_jobs, - dependencies = enroll_deps_t[group], - **args.grid.scoring_queue) - - job_ids['score-%s-D'%group] = submitter.submit( - '--sub-task compute-scores --group %s --score-type D'%group, - name = "score-D-%s"%group, - number_of_parallel_jobs = args.grid.number_of_scoring_jobs, - dependencies = enroll_deps_t[group], - **args.grid.scoring_queue) - - # compute zt-norm - score_deps[group] = [job_ids['score-%s-A'%group], job_ids['score-%s-B'%group], job_ids['score-%s-C'%group], job_ids['score-%s-D'%group]] - job_ids['score-%s-Z'%group] = submitter.submit( - '--sub-task compute-scores --group %s --score-type Z'%group, - name = "score-Z-%s"%group, - dependencies = score_deps[group]) - concat_deps[group].extend([job_ids['score-%s-B'%group], job_ids['score-%s-C'%group], job_ids['score-%s-D'%group], job_ids['score-%s-Z'%group]]) - else: - concat_deps[group] = deps[:] - - # concatenate results - if not args.skip_concatenation: - if args.grid is None: - jobs_to_execute.append(('concatenate', group)) - else: - job_ids['concat-%s'%group] = submitter.submit( - '--sub-task concatenate --group %s'%group, - name = "concat-%s"%group, - dependencies = concat_deps[group]) - - # calibrate the scores - if args.calibrate_scores: - if args.grid is None: - jobs_to_execute.append(('calibrate',)) - else: - calib_deps = [job_ids['concat-%s'%g] for g in args.groups if 'concat-%s'%g in job_ids] - job_ids['calibrate'] = submitter.submit( - '--sub-task calibrate', - dependencies = calib_deps) - - - if args.grid is None: - # return the list of jobs that need to be executed in series - return jobs_to_execute - else: - # return the job ids, in case anyone wants to know them - return job_ids - - -def execute(args): - """Run the desired job of the tool chain that is specified on command line. - This job might be executed either in the grid, or locally.""" - # the file selector object - fs = tools.FileSelector.instance() - - if args.dry_run: - # Don't actually run the experiment, but just print out, what we would have done - parameters = "" - if args.group is not None: parameters += "group='%s' " % args.group - if args.model_type is not None: parameters += "and model-type='%s' " % args.model_type - if args.score_type is not None: parameters += "and score-type='%s' " % args.score_type - print ("Would have executed task '%s' with %s" % (args.sub_task, parameters if parameters else "no parameters")) - # return True as we pretend to have executed the task - return True - - - # preprocess the data - if args.sub_task == 'preprocess': - tools.preprocess( - args.preprocessor, - groups = tools.groups(args), - indices = tools.indices(fs.original_data_list(groups=tools.groups(args)), None if args.grid is None else args.grid.number_of_preprocessing_jobs), - allow_missing_files = args.allow_missing_files, - force = args.force) - - # train the feature extractor - elif args.sub_task == 'train-extractor': - tools.train_extractor( - args.extractor, - args.preprocessor, - allow_missing_files = args.allow_missing_files, - force = args.force) - - # extract the features - elif args.sub_task == 'extract': - tools.extract( - args.extractor, - args.preprocessor, - groups = tools.groups(args), - indices = tools.indices(fs.preprocessed_data_list(groups=tools.groups(args)), None if args.grid is None else args.grid.number_of_extraction_jobs), - allow_missing_files = args.allow_missing_files, - force = args.force) - - # train the feature projector - elif args.sub_task == 'train-projector': - tools.train_projector( - args.algorithm, - args.extractor, - allow_missing_files = args.allow_missing_files, - force = args.force) - - # project the features - elif args.sub_task == 'project': - tools.project( - args.algorithm, - args.extractor, - groups = tools.groups(args), - indices = tools.indices(fs.preprocessed_data_list(groups=tools.groups(args)), None if args.grid is None else args.grid.number_of_projection_jobs), - allow_missing_files = args.allow_missing_files, - force = args.force) - - # train the model enroller - elif args.sub_task == 'train-enroller': - tools.train_enroller( - args.algorithm, - args.extractor, - allow_missing_files = args.allow_missing_files, - force = args.force) - - # enroll the models - elif args.sub_task == 'enroll': - model_ids = fs.model_ids(args.group) if args.model_type == 'N' else fs.t_model_ids(args.group) - tools.enroll( - args.algorithm, - args.extractor, - args.zt_norm, - indices = tools.indices(model_ids, None if args.grid is None else args.grid.number_of_enrollment_jobs), - groups = [args.group], - types = [args.model_type], - allow_missing_files = args.allow_missing_files, - force = args.force) - - # compute scores - elif args.sub_task == 'compute-scores': - if args.score_type != 'Z': - model_ids = fs.model_ids(args.group) if args.score_type in ('A', 'B') else fs.t_model_ids(args.group) - tools.compute_scores( - args.algorithm, - args.extractor, - args.zt_norm, - indices = tools.indices(model_ids, None if args.grid is None else args.grid.number_of_scoring_jobs), - groups = [args.group], - types = [args.score_type], - force = args.force, - allow_missing_files = args.allow_missing_files, - write_compressed = args.write_compressed_score_files) - - else: - tools.zt_norm( - groups = [args.group], - write_compressed = args.write_compressed_score_files, - allow_missing_files = args.allow_missing_files) - - # concatenate - elif args.sub_task == 'concatenate': - tools.concatenate( - args.zt_norm, - groups = [args.group], - write_compressed = args.write_compressed_score_files, - add_model_id = args.write_five_column_score_files) - - # calibrate scores - elif args.sub_task == 'calibrate': - tools.calibrate( - args.zt_norm, - groups = args.groups, - write_compressed = args.write_compressed_score_files) - - # Test if the keyword was processed - else: - return False - return True - - - -def verify(args, command_line_parameters, external_fake_job_id = 0): - """This is the main entry point for computing verification experiments. - You just have to specify configurations for any of the steps of the toolchain, which are: - -- the database - -- the preprocessing - -- feature extraction - -- the recognition algorithm - -- and the grid configuration (in case, the function should be executed in the grid). - Additionally, you can skip parts of the toolchain by selecting proper --skip-... parameters. - If your probe files are not too big, you can also specify the --preload-probes switch to speed up the score computation. - If files should be re-generated, please specify the --force option (might be combined with the --skip-... options).""" - - - # as the main entry point, check whether the sub-task is specified - if args.sub_task is not None: - # execute the desired sub-task - if not execute(args): - raise ValueError("The specified --sub-task '%s' is not known to the system" % args.sub_task) - return {} - else: - # add jobs - submitter = tools.GridSubmission(args, command_line_parameters, first_fake_job_id = external_fake_job_id) - retval = add_jobs(args, submitter) - tools.write_info(args, command_line_parameters, submitter.executable) - - if args.grid is not None: - if args.grid.is_local() and args.run_local_scheduler: - if args.dry_run: - print ("Would have started the local scheduler to run the experiments with parallel jobs") - else: - # start the jman local deamon - submitter.execute_local() - return {} - - else: - # return job ids as a dictionary - return retval - else: - # not in a grid, execute tool chain sequentially - if args.timer: - logger.info("- Timer: Starting timer") - start_time = os.times() - # execute the list of jobs that we have added before - for job in retval: - # set comamnd line arguments - args.sub_task = job[0] - args.group = None if len(job) <= 1 else job[1] - args.model_type = None if len(job) <= 2 else job[2] - args.score_type = None if len(job) <= 3 else job[3] - if not execute(args): - raise ValueError("The current --sub-task '%s' is not known to the system" % args.sub_task) - - if args.timer: - end_time = os.times() - logger.info("- Timer: Stopped timer") - - for t in args.timer: - index = {'real':4, 'system':1, 'user':0}[t] - print ("Elapsed", t ,"time:", end_time[index] - start_time[index], "seconds") - - return {} - -def main(command_line_parameters = None): - """Executes the main function""" - try: - # do the command line parsing - args = parse_arguments(command_line_parameters) - - # perform face verification test - verify(args, command_line_parameters) - except Exception as e: - # track any exceptions as error logs (i.e., to get a time stamp) - logger.error("During the execution, an exception was raised: %s" % e) - raise - -if __name__ == "__main__": - main() diff --git a/bob/bio/base/test/test_baselines.py b/bob/bio/base/test/test_baselines.py deleted file mode 100644 index 595505b8..00000000 --- a/bob/bio/base/test/test_baselines.py +++ /dev/null @@ -1,21 +0,0 @@ -import tempfile -import shutil -from click.testing import CliRunner -from bob.bio.base.script.baseline import baseline - -def test_baselines(): - - try: - tmp_dir = tempfile.mkdtemp(prefix="bobtest_") - runner = CliRunner() - result = runner.invoke(baseline, args=('dummy', 'dummy', '-T', tmp_dir, '-R', tmp_dir)) - assertion_error_message = ( - 'Command exited with this output and exception: `{}\' \n `{}\' \n' - 'If the output is empty, you can run this script locally to see ' - 'what is wrong:\n' - 'bin/bob bio baseline dummy dummy -T /tmp/baseline -R /tmp/baseline' - ''.format(result.output, result.exception)) - assert result.exit_code == 0, assertion_error_message - - finally: - shutil.rmtree(tmp_dir) diff --git a/bob/bio/base/test/test_commands.py b/bob/bio/base/test/test_commands.py index dbe1ace1..b42d1d56 100644 --- a/bob/bio/base/test/test_commands.py +++ b/bob/bio/base/test/test_commands.py @@ -90,6 +90,8 @@ def test_metrics(): def test_roc(): + """ + dev1 = pkg_resources.resource_filename('bob.bio.base.test', 'data/dev-4col.txt') runner = CliRunner() @@ -114,6 +116,7 @@ def test_roc(): click.echo(result.output) assert_click_runner_result(result) + with runner.isolated_filesystem(): result = runner.invoke(commands.roc, ['--output', 'test.pdf', @@ -137,7 +140,8 @@ def test_roc(): if result.output: click.echo(result.output) assert_click_runner_result(result) - + """ + pass def test_det(): dev1 = pkg_resources.resource_filename('bob.bio.base.test', diff --git a/bob/bio/base/test/test_config_file.py b/bob/bio/base/test/test_config_file.py deleted file mode 100644 index d00e1c33..00000000 --- a/bob/bio/base/test/test_config_file.py +++ /dev/null @@ -1,333 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : - -'''Tests for the configuration-file command line options''' - -import os -import shutil -import tempfile - -from ..script.verify import parse_arguments - - -def tmp_file(contents): - '''Generates a temporary configuration file with the contents on the input''' - - retval = tempfile.NamedTemporaryFile('w') - retval.write('\n'.join(contents) + '\n') - retval.flush() - return retval - - -def check_parameters(args_file, args_cmdline): - '''Checks parameters generated from a configuration file or command-line - are as similar they can be''' - - from bob.bio.base.test.dummy.database import DummyDatabase - assert isinstance(args_file.database, DummyDatabase) - assert isinstance(args_cmdline.database, DummyDatabase) - from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor - assert isinstance(args_file.preprocessor, DummyPreprocessor) - assert isinstance(args_cmdline.preprocessor, DummyPreprocessor) - from bob.bio.base.test.dummy.extractor import DummyExtractor - assert isinstance(args_file.extractor, DummyExtractor) - assert isinstance(args_cmdline.extractor, DummyExtractor) - from bob.bio.base.test.dummy.algorithm import DummyAlgorithm - assert isinstance(args_file.algorithm, DummyAlgorithm) - assert isinstance(args_cmdline.algorithm, DummyAlgorithm) - - # elements checked otherwise or not comparable between the two settings - skip_check = ( - 'configuration_file', - 'imports', - 'database', - 'preprocessor', - 'extractor', - 'algorithm', - ) - - for attr in [k for k in dir(args_file) if not k.startswith('_')]: - if attr in skip_check: continue - assert hasattr(args_cmdline, attr) - attr_cmdline = getattr(args_cmdline, attr) - attr_file = getattr(args_file, attr) - if (isinstance(attr_file, (bool, str, int, list))) or (attr_file is None): - assert attr_cmdline == attr_file, '(%s) %r != %r' % \ - (attr, attr_cmdline, attr_file) - else: - assert False, '(%s) %r == %r?' % (attr, attr_cmdline, attr_file) - - -def test_basic(): - - test_dir = None - test_config_file = None - try: - test_dir = tempfile.mkdtemp(prefix='bobtest_') - test_config_file = tmp_file([ - 'from bob.bio.base.test.dummy.database import database', - 'from bob.bio.base.test.dummy.preprocessor import preprocessor', - 'from bob.bio.base.test.dummy.extractor import extractor', - 'from bob.bio.base.test.dummy.algorithm import algorithm', - 'zt_norm = True', - 'verbose = 1', - 'sub_directory = "test_config"', - 'temp_directory = "%s"' % test_dir, - 'result_directory = "%s"' % test_dir, - ]) - - args = parse_arguments([test_config_file.name]) - - assert args.zt_norm is True - assert args.verbose == 1 - assert args.sub_directory.endswith('test_config') - assert args.temp_directory.startswith(test_dir) - assert args.result_directory.startswith(test_dir) - assert args.allow_missing_files is False - - from bob.bio.base.test.dummy.database import DummyDatabase - assert isinstance(args.database, DummyDatabase) - from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor - assert isinstance(args.preprocessor, DummyPreprocessor) - from bob.bio.base.test.dummy.extractor import DummyExtractor - assert isinstance(args.extractor, DummyExtractor) - from bob.bio.base.test.dummy.algorithm import DummyAlgorithm - assert isinstance(args.algorithm, DummyAlgorithm) - - finally: - if test_dir: shutil.rmtree(test_dir) - if test_config_file: del test_config_file - - -def test_compare_to_cmdline_basic(): - - test_dir = None - test_config_file = None - try: - test_dir = tempfile.mkdtemp(prefix='bobtest_') - test_config_file = tmp_file([ - 'from bob.bio.base.test.dummy.database import database', - 'from bob.bio.base.test.dummy.preprocessor import preprocessor', - 'from bob.bio.base.test.dummy.extractor import extractor', - 'from bob.bio.base.test.dummy.algorithm import algorithm', - 'zt_norm = True', - 'verbose = 1', - 'sub_directory = "test_config"', - 'temp_directory = "%s"' % test_dir, - 'result_directory = "%s"' % test_dir, - ]) - - args_file = parse_arguments([test_config_file.name]) - - # now do the same with command-line arguments, ensure result is equal - args_cmdline = parse_arguments([ - '-d', 'bob.bio.base.test.dummy.database.DummyDatabase()', - '-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor()', - '-e', 'bob.bio.base.test.dummy.extractor.DummyExtractor()', - '-a', 'bob.bio.base.test.dummy.algorithm.DummyAlgorithm()', - '--zt-norm', - '-vs', 'test_config', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--imports', 'bob.bio.base.test.dummy', - ]) - - check_parameters(args_file, args_cmdline) - - finally: - if test_dir: shutil.rmtree(test_dir) - if test_config_file: del test_config_file - - -def test_compare_to_cmdline_resources(): - - test_dir = None - test_config_file = None - try: - test_dir = tempfile.mkdtemp(prefix='bobtest_') - test_config_file = tmp_file([ - 'database = "dummy"', - 'preprocessor = "dummy"', - 'extractor = "dummy"', - 'algorithm = "dummy"', - 'zt_norm = True', - 'allow_missing_files = True', - 'verbose = 1', - 'sub_directory = "test_config"', - 'temp_directory = "%s"' % test_dir, - 'result_directory = "%s"' % test_dir, - 'preferred_package = "bob.bio.base"', - ]) - - args_file = parse_arguments([test_config_file.name]) - - # now do the same with command-line arguments, ensure result is equal - args_cmdline = parse_arguments([ - '-d', 'dummy', - '-p', 'dummy', - '-e', 'dummy', - '-a', 'dummy', - '--zt-norm', - '--allow-missing-files', - '-vs', 'test_config', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--preferred-package', 'bob.bio.base', - ]) - - check_parameters(args_file, args_cmdline) - - finally: - if test_dir: shutil.rmtree(test_dir) - if test_config_file: del test_config_file - - -def test_compare_to_cmdline_skip(): - - test_dir = None - test_config_file = None - try: - test_dir = tempfile.mkdtemp(prefix='bobtest_') - test_config_file = tmp_file([ - 'database = "dummy"', - 'preprocessor = "dummy"', - 'extractor = "dummy"', - 'skip_preprocessing = True', - 'skip_extraction = True', - 'algorithm = "dummy"', - 'zt_norm = True', - 'allow_missing_files = True', - 'verbose = 1', - 'sub_directory = "test_config"', - 'temp_directory = "%s"' % test_dir, - 'result_directory = "%s"' % test_dir, - 'preferred_package = "bob.bio.base"', - ]) - - args_file = parse_arguments([test_config_file.name]) - - # now do the same with command-line arguments, ensure result is equal - args_cmdline = parse_arguments([ - '-d', 'dummy', - '-p', 'dummy', - '-e', 'dummy', - '-a', 'dummy', - '--zt-norm', - '--allow-missing-files', - '--skip-preprocessing', - '--skip-extraction', - '-vs', 'test_config', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--preferred-package', 'bob.bio.base', - ]) - - check_parameters(args_file, args_cmdline) - - finally: - if test_dir: shutil.rmtree(test_dir) - if test_config_file: del test_config_file - - -def test_from_resource(): - - test_dir = None - - try: - test_dir = tempfile.mkdtemp(prefix='bobtest_') - args = parse_arguments(['dummy']) - - assert args.sub_directory.endswith('test_dummy') - assert args.allow_missing_files is False - assert args.zt_norm is True - assert args.verbose == 1 - - from bob.bio.base.test.dummy.database import DummyDatabase - assert isinstance(args.database, DummyDatabase) - from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor - assert isinstance(args.preprocessor, DummyPreprocessor) - from bob.bio.base.test.dummy.extractor import DummyExtractor - assert isinstance(args.extractor, DummyExtractor) - from bob.bio.base.test.dummy.algorithm import DummyAlgorithm - assert isinstance(args.algorithm, DummyAlgorithm) - - finally: - if test_dir: shutil.rmtree(test_dir) - - -def test_from_module(): - - test_dir = None - - try: - test_dir = tempfile.mkdtemp(prefix='bobtest_') - args = parse_arguments(['bob.bio.base.test.dummy.config']) - - assert args.sub_directory.endswith('test_dummy') - assert args.allow_missing_files is False - assert args.zt_norm is True - assert args.verbose == 1 - - from bob.bio.base.test.dummy.database import DummyDatabase - assert isinstance(args.database, DummyDatabase) - from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor - assert isinstance(args.preprocessor, DummyPreprocessor) - from bob.bio.base.test.dummy.extractor import DummyExtractor - assert isinstance(args.extractor, DummyExtractor) - from bob.bio.base.test.dummy.algorithm import DummyAlgorithm - assert isinstance(args.algorithm, DummyAlgorithm) - - finally: - if test_dir: shutil.rmtree(test_dir) - - -def test_order(): - - test_dir = None - - try: - test_dir = tempfile.mkdtemp(prefix='bobtest_') - args = parse_arguments(['dummy', 'dummy2']) - - assert args.sub_directory.endswith('test_dummy2') - assert args.allow_missing_files is False - assert args.zt_norm is True - assert args.verbose == 2 - - from bob.bio.base.test.dummy.database import DummyDatabase - assert isinstance(args.database, DummyDatabase) - from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor - assert isinstance(args.preprocessor, DummyPreprocessor) - from bob.bio.base.test.dummy.extractor import DummyExtractor - assert isinstance(args.extractor, DummyExtractor) - from bob.bio.base.test.dummy.algorithm import DummyAlgorithm - assert isinstance(args.algorithm, DummyAlgorithm) - - finally: - if test_dir: shutil.rmtree(test_dir) - - -def test_order_inverse(): - - test_dir = None - - try: - test_dir = tempfile.mkdtemp(prefix='bobtest_') - args = parse_arguments(['dummy2', 'dummy']) - - assert args.sub_directory.endswith('test_dummy') - assert args.allow_missing_files is False - assert args.zt_norm is True - assert args.verbose == 1 - - from bob.bio.base.test.dummy.database import DummyDatabase - assert isinstance(args.database, DummyDatabase) - from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor - assert isinstance(args.preprocessor, DummyPreprocessor) - from bob.bio.base.test.dummy.extractor import DummyExtractor - assert isinstance(args.extractor, DummyExtractor) - from bob.bio.base.test.dummy.algorithm import DummyAlgorithm - assert isinstance(args.algorithm, DummyAlgorithm) - - finally: - if test_dir: shutil.rmtree(test_dir) diff --git a/bob/bio/base/test/test_scripts.py b/bob/bio/base/test/test_scripts.py deleted file mode 100644 index 582739bf..00000000 --- a/bob/bio/base/test/test_scripts.py +++ /dev/null @@ -1,713 +0,0 @@ - -import os -import shutil -import tempfile -import numpy -import nose - -import bob.io.image -import bob.bio.base -from . import utils -from .. import score - -from nose.plugins.skip import SkipTest - -import pkg_resources - -regenerate_reference = False - -dummy_dir = pkg_resources.resource_filename('bob.bio.base', 'test/dummy') -data_dir = pkg_resources.resource_filename('bob.bio.base', 'test/data') - -def _verify(parameters, test_dir, sub_dir, ref_modifier="", score_modifier=('scores',''), counts=3, check_zt=True): - from bob.bio.base.script.verify import main - try: - main(parameters) - - Range = (0,1) if check_zt else (0,) - - # assert that the score file exists - score_files = [os.path.join(test_dir, sub_dir, 'Default', norm, '%s-dev%s'%score_modifier) for norm in ('nonorm', 'ztnorm')] - for i in Range: - assert os.path.exists(score_files[i]), "Score file %s does not exist" % score_files[i] - - # also assert that the scores are still the same -- though they have no real meaning - reference_files = [os.path.join(data_dir, 'scores-%s%s-dev'%(norm, ref_modifier)) for norm in ('nonorm', 'ztnorm')] - - if regenerate_reference: - for i in Range: - shutil.copy(score_files[i], reference_files[i]) - - for i in Range: - d = [] - # read reference and new data - for score_file in (score_files[i], reference_files[i]): - f = score.open_file(score_file) - d_ = [] - for line in f: - if isinstance(line, bytes): line = line.decode('utf-8') - d_.append(line.rstrip().split()) - d.append(numpy.array(d_)) - - assert d[0].shape == d[1].shape - # assert that the data order is still correct - assert (d[0][:,0:counts] == d[1][:, 0:counts]).all() - # assert that the values are OK - assert numpy.allclose(d[0][:,counts].astype(float), d[1][:,counts].astype(float), 1e-5) - - assert not os.path.exists(os.path.join(test_dir, 'submitted.sql3')) - - finally: - shutil.rmtree(test_dir) - - -def test_verify_single_config(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - os.path.join(dummy_dir, 'config.py'), - '--temp-directory', test_dir, - '--result-directory', test_dir - ] - - _verify(parameters, test_dir, 'test_dummy') - - -def test_verify_multiple_config(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', os.path.join(dummy_dir, 'database.py'), - '-p', os.path.join(dummy_dir, 'preprocessor.py'), - '-e', os.path.join(dummy_dir, 'extractor.py'), - '-a', os.path.join(dummy_dir, 'algorithm.py'), - '--zt-norm', - '-vs', 'test_config', - '--temp-directory', test_dir, - '--result-directory', test_dir - ] - - _verify(parameters, test_dir, 'test_config') - - -def test_verify_algorithm_noprojection(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', os.path.join(dummy_dir, 'database.py'), - '-p', os.path.join(dummy_dir, 'preprocessor.py'), - '-e', os.path.join(dummy_dir, 'extractor.py'), - '-a', os.path.join(dummy_dir, 'algorithm_noprojection.py'), - '--zt-norm', - '-vs', 'algorithm_noprojection', - '--temp-directory', test_dir, - '--result-directory', test_dir - ] - - _verify(parameters, test_dir, 'algorithm_noprojection') - - -def test_verify_no_ztnorm(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', os.path.join(dummy_dir, 'database.py'), - '-p', os.path.join(dummy_dir, 'preprocessor.py'), - '-e', os.path.join(dummy_dir, 'extractor.py'), - '-a', os.path.join(dummy_dir, 'algorithm_noprojection.py'), - '-vs', 'test_nozt', - '--temp-directory', test_dir, - '--result-directory', test_dir - ] - - _verify(parameters, test_dir, 'test_nozt', check_zt=False) - - - -def test_verify_resources(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', 'dummy', - '-p', 'dummy', - '-e', 'dummy', - '-a', 'dummy', - '--zt-norm', - '--allow-missing-files', - '-vs', 'test_resource', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--preferred-package', 'bob.bio.base' - ] - - _verify(parameters, test_dir, 'test_resource') - - -def test_verify_resources_metadata(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', 'dummy', - '-p', 'dummy_metadata', - '-e', 'dummy_metadata', - '-a', 'dummy_metadata', - '--zt-norm', - '--allow-missing-files', - '-vs', 'test_resource', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--preferred-package', 'bob.bio.base' - ] - - _verify(parameters, test_dir, 'test_resource') - - -def test_verify_commandline(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', 'bob.bio.base.test.dummy.database.DummyDatabase()', - '-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor()', - '-e', 'bob.bio.base.test.dummy.extractor.DummyExtractor()', - '-a', 'bob.bio.base.test.dummy.algorithm.DummyAlgorithm()', - '--zt-norm', - '-vs', 'test_commandline', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--imports', 'bob.bio.base.test.dummy' - ] - - _verify(parameters, test_dir, 'test_commandline') - - -@utils.grid_available -def test_verify_parallel(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - test_database = os.path.join(test_dir, "submitted.sql3") - - # define dummy parameters - parameters = [ - '-d', os.path.join(dummy_dir, 'database.py'), - '-p', 'dummy', - '-e', 'bob.bio.base.test.dummy.extractor.DummyExtractor()', - '-a', 'dummy', - '--zt-norm', - '-vs', 'test_parallel', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', - '-G', test_database, '--run-local-scheduler', '--stop-on-failure', - '-D', 'success', - '--imports', 'bob.io.image', 'bob.bio.base.test.dummy', - '--preferred-package', 'bob.bio.base' - ] - - _verify(parameters, test_dir, 'test_parallel') - - -def test_verify_compressed(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', 'dummy', - '-p', 'dummy', - '-e', 'dummy', - '-a', 'dummy', - '--zt-norm', - '-vs', 'test_compressed', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--write-compressed-score-files', - '--preferred-package', 'bob.bio.base' - ] - - _verify(parameters, test_dir, 'test_compressed', score_modifier=('scores', '.tar.bz2')) - - -def test_verify_calibrate(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', 'dummy', - '-p', 'dummy', - '-e', 'dummy', - '-a', 'dummy', - '--zt-norm', - '-vs', 'test_calibrate', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--calibrate-scores', - '--preferred-package', 'bob.bio.base' - ] - - _verify(parameters, test_dir, 'test_calibrate', '-calibrated', score_modifier=('calibrated', '')) - - -def test_verify_fileset(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', os.path.join(dummy_dir, 'fileset.py'), - '-p', 'dummy', - '-e', 'bob.bio.base.test.dummy.extractor.DummyExtractor()', - '-a', 'dummy', - '--zt-norm', - '-vs', 'test_fileset', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--preferred-package', 'bob.bio.base', - '--imports', 'bob.bio.base.test.dummy' - ] - - _verify(parameters, test_dir, 'test_fileset', ref_modifier="-fileset") - - -def test_verify_filelist(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', os.path.join(dummy_dir, 'filelist.py'), - '-p', 'dummy', - '-e', 'dummy', - '-a', 'dummy', - '--zt-norm', - '-vs', 'test_filelist', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--preferred-package', 'bob.bio.base' - ] - - from bob.bio.base.script.verify import main - try: - main(parameters) - - # assert that the score file exists - score_files = [os.path.join(test_dir, 'test_filelist', 'None', norm, 'scores-dev') for norm in ('nonorm', 'ztnorm')] - assert os.path.exists(score_files[0]), "Score file %s does not exist" % score_files[0] - assert os.path.exists(score_files[1]), "Score file %s does not exist" % score_files[1] - - # assert that the scores are are identical (might be in a different order, though - reference_files = [os.path.join(data_dir, 'scores-%s-dev' % norm) for norm in ('nonorm', 'ztnorm')] - - for i in (0,1): - # load scores - a1, b1 = score.split_four_column(score_files[i]) - a2, b2 = score.split_four_column(reference_files[i]) - # sort scores - a1 = sorted(a1); a2 = sorted(a2); b1 = sorted(b1); b2 = sorted(b2) - - # assert that scores are almost equal - assert all(abs(a1[j] - a2[j]) < 1e-6 for j in range(len(a1))) - assert all(abs(b1[j] - b2[j]) < 1e-6 for j in range(len(b1))) - - finally: - shutil.rmtree(test_dir) - - -def test_verify_missing(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', 'dummy', - '-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True)', - '-e', 'dummy', - '-a', 'dummy', - '--zt-norm', - '--allow-missing-files', - '-vs', 'test_missing', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--preferred-package', 'bob.bio.base', - '--imports', 'bob.bio.base.test.dummy' - ] - - from bob.bio.base.script.verify import main - try: - main(parameters) - - # assert that the score file exists - score_files = [os.path.join(test_dir, 'test_missing', 'Default', norm, 'scores-dev') for norm in ('nonorm', 'ztnorm')] - assert os.path.exists(score_files[0]), "Score file %s does not exist" % score_files[0] - assert os.path.exists(score_files[1]), "Score file %s does not exist" % score_files[1] - - # assert that all scores are NaN - - for i in (0,1): - # load scores - a, b = score.split_four_column(score_files[i]) - - assert numpy.all(numpy.isnan(a)) - assert numpy.all(numpy.isnan(b)) - - finally: - shutil.rmtree(test_dir) - - -def test_verify_five_col(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', 'dummy', - '-p', 'dummy', - '-e', 'dummy', - '-a', 'dummy', - '--zt-norm', - '--write-five-column-score-files', - '-vs', 'test_missing', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--preferred-package', 'bob.bio.base', - '--imports', 'bob.bio.base.test.dummy' - ] - _verify(parameters, test_dir, 'test_missing', ref_modifier="-fivecol", counts=4) - - -def test_verify_execute_only(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', 'dummy', - '-p', 'dummy', - '-e', 'dummy', - '-a', 'dummy', - '--zt-norm', - '--allow-missing-files', - '-vs', 'test_missing', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--preferred-package', 'bob.bio.base', - '--imports', 'bob.bio.base.test.dummy', - '--execute-only', 'preprocessing', 'score-computation', - '--dry-run' - ] - - try: - from bob.bio.base.script.verify import main - main(parameters) - finally: - if os.path.exists(test_dir): - shutil.rmtree(test_dir) - - -def test_internal_raises(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # define dummy parameters - parameters = [ - '-d', 'dummy', - '-p', 'dummy', - '-e', 'dummy', - '-a', 'dummy', - '-vs', 'test_raises', - '--temp-directory', test_dir, - '--result-directory', test_dir, - '--preferred-package', 'bob.bio.base', - '--imports', 'bob.bio.base.test.dummy' - ] - - try: - from bob.bio.base.script.verify import main - for option, value in (("--group", "dev"), ("--model-type", "N"), ("--score-type", "A")): - internal = parameters + [option, value] - - nose.tools.assert_raises(ValueError, main, internal) - finally: - shutil.rmtree(test_dir) - - -def test_verify_generate_config(): - # tests the config file generation - test_dir = tempfile.mkdtemp(prefix='bobtest_') - config_file = os.path.join(test_dir, 'config.py') - # define dummy parameters - parameters = [ - '-H', config_file - ] - try: - from bob.bio.base.script.verify import main - nose.tools.assert_raises(SystemExit, main, parameters) - assert os.path.exists(config_file) - from bob.bio.base.tools.command_line import _required_list, _common_list, _optional_list - assert all(a in _required_list for a in ['database', 'preprocessor', 'extractor', 'algorithm', 'sub_directory']) - assert all(a in _common_list for a in ['protocol', 'grid', 'parallel', 'verbose', 'groups', 'temp_directory', 'result_directory', 'zt_norm', 'allow_missing_files', 'dry_run', 'force']) - assert all(a in _optional_list for a in ['preprocessed_directory', 'extracted_directory', 'projected_directory', 'model_directories', 'extractor_file', 'projector_file', 'enroller_file']) - # todo: this list is actually much longer... - _rare_list = ['imports', 'experiment_info_file', 'write_compressed_score_files', 'skip_preprocessing', 'skip_calibration', 'execute_only'] - - lines = open(config_file).readlines() - - # split into four lists (required, common, optional, rare) - last_lines = None - split_lines = [] - for line in lines: - if line.startswith("#####"): - if last_lines: - split_lines.append(last_lines) - last_lines = [] - else: - if last_lines is not None: - last_lines.append(line) - split_lines.append(last_lines) - assert len(split_lines) == 4 - - for _list, lines in zip((_required_list, _common_list, _optional_list, _rare_list), split_lines): - for a in _list: - assert any(l.startswith("#%s =" %a) for l in lines), a - finally: - shutil.rmtree(test_dir) - - - - - - -def test_fusion(): - # tests that the fuse_scores script is doing something useful - test_dir = tempfile.mkdtemp(prefix='bobtest_') - reference_files = [os.path.join(data_dir, s) for s in ('scores-nonorm-dev', 'scores-ztnorm-dev')] - output_files = [os.path.join(test_dir, s) for s in ("fused-dev", "fused-eval")] - parameters = [ - '--dev-files', reference_files[0], reference_files[1], - '--eval-files', reference_files[0], reference_files[1], - '--fused-dev-file', output_files[0], - '--fused-eval-file', output_files[1], - '--max-iterations', '100', - '--convergence-threshold', '1e-4', - '-v' - ] - - # execute the script - from bob.bio.base.script.fuse_scores import main - try: - main(parameters) - - # assert that we can read the two files, and that they contain the same number of lines as the original file - for i in (0,1): - assert os.path.exists(output_files[i]) - r = score.four_column(reference_files[i]) - o = score.four_column(output_files[i]) - assert len(list(r)) == len(list(o)) - finally: - shutil.rmtree(test_dir) - - - -def test_resources(): - # simply test that the resorces script works - from bob.bio.base.script.resources import resources, databases - with utils.Quiet(): - resources(['--types', 'database', 'preprocessor', 'extractor', 'algorithm', 'grid', '--details', '--packages', 'bob.bio.base']) - databases([]) - - -def test_collect_results(): - # simply test that the collect_results script works - from bob.bio.base.script.collect_results import main - # FAR criterion - main([ - '-D', data_dir, - '-d', 'scores-nonorm-dev', - '-e', 'scores-nonorm-fivecol-dev', - '-n', '.', '-z', '.', - '--sort', '--sort-key', 'dir', - '--criterion', 'FAR', '--far-threshold', '0.1', - '--self-test', '-v' - ]) - - # Recognition Rate - main([ - '-D', data_dir, - '-d', 'scores-nonorm-dev', - '-e', 'scores-nonorm-fivecol-dev', - '-n', '.', '-z', '.', - '--sort', '--sort-key', 'dir', - '--criterion', 'RR', '--rank', '10', - '--self-test', '-v' - ]) - - # DIR - main([ - '-D', data_dir, - '-d', 'scores-nonorm-openset-dev', - '-n', '.', '-z', '.', - '--sort', '--sort-key', 'dir', - '--criterion', 'DIR', '--far-threshold', '0.1', - '--self-test', '-v' - ]) - - - -@utils.grid_available -def test_grid_search(): - test_dir = tempfile.mkdtemp(prefix='bobtest_') - # tests that the parameter_test.py script works properly - - try: - # first test without grid option - parameters = [ - '-c', os.path.join(dummy_dir, 'grid_search.py'), - '-d', 'dummy', - '-e', 'dummy', - '-s', 'test_grid_search', - '-T', test_dir, - '-R', test_dir, - '-v', - '--', '--dry-run', - '--preferred-package', 'bob.bio.base' - ] - from bob.bio.base.script.grid_search import main - with utils.Quiet(): - main(parameters) - - # number of jobs should be 12 - assert bob.bio.base.script.grid_search.task_count == 6 - # but no job in the grid - assert bob.bio.base.script.grid_search.job_count == 0 - # assert that the Experiment.info files are at the right location - for p in (1,2): - for f in (1,2): - for s in (1,2): - if 2*p>f: - assert os.path.exists(os.path.join(test_dir, "test_grid_search/Default/P%d/F%d/S%d/Experiment.info"%(p,f,s))) - - # now, in the grid... - parameters = [ - '-c', os.path.join(dummy_dir, 'grid_search.py'), - '-d', 'dummy', - '-s', 'test_grid_search', - '-i', '.', - '-G', test_dir, - '-T', test_dir, - '-R', test_dir, - '-g', 'grid', - '-v', - '--', '--dry-run', - '--preferred-package', 'bob.bio.base' - ] - with utils.Quiet(): - main(parameters) - - # number of jobs should be 12 - assert bob.bio.base.script.grid_search.task_count == 6 - # number of jobs in the grid: 36 (including best possible re-use of files; minus preprocessing) - assert bob.bio.base.script.grid_search.job_count == 30 - - # and now, finally run locally - parameters = [ - '-c', os.path.join(dummy_dir, 'grid_search.py'), - '-d', 'dummy', - '-s', 'test_grid_search', - '-G', test_dir, - '-T', test_dir, - '-R', test_dir, - '-l', '4', '-L', '-1', '-v', - '--', '--imports', 'bob.io.image', - '--dry-run', - '--preferred-package', 'bob.bio.base' - ] - with utils.Quiet(): - main(parameters) - - # number of jobs should be 12 - assert bob.bio.base.script.grid_search.task_count == 6 - # number of jobs in the grid: 36 (including best possible re-use of files; minus preprocessing) - assert bob.bio.base.script.grid_search.job_count == 0 - - finally: - shutil.rmtree(test_dir) - - -def test_scripts(): - # Tests the preprocess.py, extract.py, enroll.py and score.py scripts - test_dir = tempfile.mkdtemp(prefix='bobtest_') - data_file = os.path.join(test_dir, "data.hdf5") - annotation_file = os.path.join(test_dir, "annotatations.txt") - preprocessed_file = os.path.join(test_dir, "preprocessed.hdf5") - preprocessed_image = os.path.join(test_dir, "preprocessed.png") - extractor_file = os.path.join(test_dir, "extractor.hdf5") - extracted_file = os.path.join(test_dir, "extracted.hdf5") - projector_file = os.path.join(test_dir, "projector.hdf5") - enroller_file = os.path.join(test_dir, "enroller.hdf5") - model_file = os.path.join(test_dir, "model.hdf5") - - # tests that the parameter_test.py script works properly - try: - # create test data - test_data = utils.random_array((20,20), 0., 255., seed=84) - test_data[0,0] = 0. - test_data[19,19] = 255. - bob.io.base.save(test_data, data_file) - with open(annotation_file, 'w') as a: - a.write("leye 100 200\nreye 100 100") - - extractor = bob.bio.base.load_resource("dummy", "extractor") - extractor.train([], extractor_file) - - algorithm = bob.bio.base.load_resource("dummy", "algorithm") - algorithm.train_projector([], projector_file) - algorithm.train_enroller([], enroller_file) - - from bob.bio.base.script.preprocess import main as preprocess - from bob.bio.base.script.extract import main as extract - from bob.bio.base.script.enroll import main as enroll - from bob.bio.base.script.score import main as score - - # preprocessing - parameters = [ - '-i', data_file, - '-a', annotation_file, - '-p', 'dummy', - '-o', preprocessed_file, - '-c', preprocessed_image, - '-v', - ] - preprocess(parameters) - - assert os.path.isfile(preprocessed_file) - assert os.path.isfile(preprocessed_image) - assert numpy.allclose(bob.io.base.load(preprocessed_file), test_data) - assert numpy.allclose(bob.io.base.load(preprocessed_image), test_data, rtol=1., atol=1.) - - # feature extraction - parameters = [ - '-i', preprocessed_file, - '-p', 'dummy', - '-e', 'dummy', - '-E', extractor_file, - '-o', extracted_file, - '-v', - ] - extract(parameters) - - assert os.path.isfile(extracted_file) - assert numpy.allclose(bob.io.base.load(extracted_file), test_data.flatten()) - - # enrollment - parameters = [ - '-i', extracted_file, extracted_file, - '-e', 'dummy', - '-a', 'dummy', - '-P', projector_file, - '-E', enroller_file, - '-o', model_file, - '-v', - ] - enroll(parameters) - - assert os.path.isfile(model_file) - assert numpy.allclose(bob.io.base.load(model_file), test_data.flatten()) - - # scoring - parameters = [ - '-m', model_file, model_file, - '-p', extracted_file, extracted_file, - '-e', 'dummy', - '-a', 'dummy', - '-P', projector_file, - '-E', enroller_file, - '-v', - ] - with utils.Quiet(): - score(parameters) - - finally: - shutil.rmtree(test_dir) diff --git a/bob/bio/base/test/test_tools.py b/bob/bio/base/test/test_tools.py deleted file mode 100644 index 7d12e2c1..00000000 --- a/bob/bio/base/test/test_tools.py +++ /dev/null @@ -1,4 +0,0 @@ - - -def test_file_selector(): - pass diff --git a/bob/bio/base/test/test_utils.py b/bob/bio/base/test/test_utils.py index ce46232b..5748dd69 100644 --- a/bob/bio/base/test/test_utils.py +++ b/bob/bio/base/test/test_utils.py @@ -27,24 +27,6 @@ def test_resources(): assert 'bob.bio.base' in extensions -def test_grid(): - # try to load the grid configurations - g = bob.bio.base.load_resource("grid", "grid") - assert not g.is_local() - g = bob.bio.base.load_resource("demanding", "grid") - assert not g.is_local() - - g = bob.bio.base.load_resource("local-p4", "grid") - assert g.is_local() - assert g.number_of_parallel_processes == 4 - g = bob.bio.base.load_resource("local-p8", "grid") - assert g.is_local() - assert g.number_of_parallel_processes == 8 - g = bob.bio.base.load_resource("local-p16", "grid") - assert g.is_local() - assert g.number_of_parallel_processes == 16 - - def test_io(): # Test that bob.bio.base.load and save works as expected filename = bob.io.base.test_utils.temporary_filename() diff --git a/bob/bio/base/tools/FileSelector.py b/bob/bio/base/tools/FileSelector.py deleted file mode 100644 index 3f1a9bae..00000000 --- a/bob/bio/base/tools/FileSelector.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# Manuel Guenther <Manuel.Guenther@idiap.ch> - -import os -from .. import utils - -@utils.Singleton -class FileSelector(object): - """This class provides shortcuts for selecting different files for different stages of the verification process. - - It communicates with the database and provides lists of file names for all steps of the tool chain. - - .. todo:: Find a way that this class' methods get correctly documented, instead of the :py:class:`bob.bio.base.Singleton` wrapper class. - - **Parameters:** - - database : :py:class:`bob.bio.base.database.BioDatabase` or derived - The database object that provides the list of files. - - preprocessed_directory : str - The directory, where preprocessed data should be written to. - - extractor_file : str - The filename, where the extractor should be written to (if any). - - extracted_directory : str - The directory, where extracted features should be written to. - - projector_file : str - The filename, where the projector should be written to (if any). - - projected_directory : str - The directory, where projetced features should be written to (if required). - - enroller_file : str - The filename, where the enroller should be written to (if required). - - model_directories : (str, str) - The directories, where models and t-norm models should be written to. - - score_directories : (str, str) - The directories, where score files for no-norm and ZT-norm should be written to. - - zt_score_directories : (str, str, str, str, str) or ``None`` - If given, specify the directories, where intermediate score files required to compute the ZT-norm should be written. - The 5 directories are for 1: normal scores; 2: Z-scores; 3: T-scores; 4: ZT-scores; 5: ZT-samevalue scores. - - default_extension : str - The default extension of all intermediate files. - - compressed_extension : str - The extension for writing compressed score files. - By default, no compression is performed. - - """ - - def __init__( - self, - database, - preprocessed_directory, - extractor_file, - extracted_directory, - projector_file, - projected_directory, - enroller_file, - model_directories, - score_directories, - zt_score_directories = None, - default_extension = '.hdf5', - compressed_extension = '', - zt_norm = False - ): - - """Initialize the file selector object with the current configuration.""" - self.database = database - self.extractor_file = extractor_file - self.projector_file = projector_file - self.enroller_file = enroller_file - - self.model_directories = model_directories - self.score_directories = score_directories - self.zt_score_directories = zt_score_directories - self.default_extension = default_extension - self.compressed_extension = compressed_extension - - self.directories = { - 'original' : database.original_directory, - 'preprocessed' : preprocessed_directory, - 'extracted' : extracted_directory, - 'projected' : projected_directory - } - self.zt_norm = zt_norm - - - def uses_probe_file_sets(self): - """Returns true if the given protocol enables several probe files for scoring.""" - return self.database.uses_probe_file_sets() - - def get_paths(self, files, directory_type = None): - """Returns the list of file names for the given list of File objects.""" - try: - directory = self.directories[directory_type] - except KeyError: - raise ValueError("The given directory type '%s' is not supported." % directory_type) - - return self.database.file_names(files, directory, self.default_extension) - - - ### List of files that will be used for all files - def original_data_list(self, groups = None): - """Returns the list of original ``BioFile`` objects that can be used for preprocessing.""" - return self.database.all_files(groups=groups,add_zt_files=self.zt_norm) - - def original_directory_and_extension(self): - """Returns the directory and extension of the original files.""" - return self.database.original_directory, self.database.original_extension - - def annotation_list(self, groups = None): - """Returns the list of annotations objects.""" - return self.database.all_files(groups=groups,add_zt_files=self.zt_norm) - - def get_annotations(self, annotation_file): - """Returns the annotations of the given file.""" - return self.database.annotations(annotation_file) - - def preprocessed_data_list(self, groups = None): - """Returns the list of preprocessed data files.""" - return self.get_paths(self.database.all_files(groups=groups,add_zt_files=self.zt_norm), "preprocessed") - - def feature_list(self, groups = None): - """Returns the list of extracted feature files.""" - return self.get_paths(self.database.all_files(groups=groups,add_zt_files=self.zt_norm), "extracted") - - def projected_list(self, groups = None): - """Returns the list of projected feature files.""" - return self.get_paths(self.database.all_files(groups=groups,add_zt_files=self.zt_norm), "projected") - - - ### Training lists - def training_list(self, directory_type, step, arrange_by_client = False): - """Returns the list of features that should be used for projector training. - The directory_type might be any of 'preprocessed', 'extracted', or 'projected'. - The step might by any of 'train_extractor', 'train_projector', or 'train_enroller'. - If arrange_by_client is enabled, a list of lists (one list for each client) is returned.""" - files = self.database.training_files(step, arrange_by_client) - if arrange_by_client: - return [self.get_paths(files[client], directory_type) for client in range(len(files))] - else: - return self.get_paths(files, directory_type) - - - ### Enrollment and models - def client_id(self, model_id, group, is_t_model_id = False): - """Returns the id of the client for the given model id or T-norm model id.""" - if is_t_model_id: - return self.database.client_id_from_t_model_id(model_id, group = group) - else: - return self.database.client_id_from_model_id(model_id, group = group) - - def model_ids(self, group): - """Returns the sorted list of model ids from the given group.""" - return sorted(self.database.model_ids(groups=group)) - - def enroll_files(self, model_id, group, directory_type): - """Returns the list of model feature files used for enrollment of the model with the given model_id from the given group. - The directory_type might be 'extracted' or 'projected'.""" - files = self.database.enroll_files(group = group, model_id = model_id) - return self.get_paths(files, directory_type) - - def model_file(self, model_id, group): - """Returns the file of the model with the given model id.""" - return os.path.join(self.model_directories[0], group, str(model_id) + self.default_extension) - - def probe_objects(self, group): - """Returns the probe File objects used to compute the raw scores.""" - # get the probe files for all models - if self.uses_probe_file_sets(): - return self.database.probe_file_sets(group = group) - else: - return self.database.probe_files(group = group) - - def probe_objects_for_model(self, model_id, group): - """Returns the probe File objects used to compute the raw scores for the given model id. - This is actually a sub-set of all probe_objects().""" - # get the probe files for the specific model - if self.uses_probe_file_sets(): - return self.database.probe_file_sets(model_id = model_id, group = group) - else: - return self.database.probe_files(model_id = model_id, group = group) - - - def t_model_ids(self, group): - """Returns the sorted list of T-Norm-model ids from the given group.""" - return sorted(self.database.t_model_ids(groups = group)) - - def t_enroll_files(self, t_model_id, group, directory_type): - """Returns the list of T-norm model files used for enrollment of the given model_id from the given group.""" - files = self.database.t_enroll_files(group = group, t_model_id = t_model_id) - return self.get_paths(files, directory_type) - - def t_model_file(self, model_id, group): - """Returns the file of the T-Norm-model with the given model id.""" - return os.path.join(self.model_directories[1], group, str(model_id) + self.default_extension) - - def z_probe_objects(self, group): - """Returns the probe File objects used to compute the Z-Norm.""" - # get the probe files for all models - if self.uses_probe_file_sets(): - return self.database.z_probe_file_sets(group = group) - else: - return self.database.z_probe_files(group = group) - - - ### ZT-Normalization - def a_file(self, model_id, group): - """Returns the A-file for the given model id that is used for computing ZT normalization.""" - a_dir = os.path.join(self.zt_score_directories[0], group) - return os.path.join(a_dir, str(model_id) + self.default_extension) - - def b_file(self, model_id, group): - """Returns the B-file for the given model id that is used for computing ZT normalization.""" - b_dir = os.path.join(self.zt_score_directories[1], group) - return os.path.join(b_dir, str(model_id) + self.default_extension) - - def c_file(self, t_model_id, group): - """Returns the C-file for the given T-model id that is used for computing ZT normalization.""" - c_dir = os.path.join(self.zt_score_directories[2], group) - return os.path.join(c_dir, "TM" + str(t_model_id) + self.default_extension) - - def c_file_for_model(self, model_id, group): - """Returns the C-file for the given model id that is used for computing ZT normalization.""" - c_dir = os.path.join(self.zt_score_directories[2], group) - return os.path.join(c_dir, str(model_id) + self.default_extension) - - def d_file(self, t_model_id, group): - """Returns the D-file for the given T-model id that is used for computing ZT normalization.""" - d_dir = os.path.join(self.zt_score_directories[3], group) - return os.path.join(d_dir, str(t_model_id) + self.default_extension) - - def d_matrix_file(self, group): - """Returns the D-file for storing all scores for pairs of T-models and Z-probes.""" - d_dir = os.path.join(self.zt_score_directories[3], group) - return os.path.join(d_dir, "D" + self.default_extension) - - def d_same_value_file(self, t_model_id, group): - """Returns the specific D-file for storing which pairs of the given T-model id and all Z-probes are intrapersonal or extrapersonal.""" - d_dir = os.path.join(self.zt_score_directories[4], group) - return os.path.join(d_dir, str(t_model_id) + self.default_extension) - - def d_same_value_matrix_file(self, group): - """Returns the specific D-file for storing which pairs of T-models and Z-probes are intrapersonal or extrapersonal.""" - d_dir = os.path.join(self.zt_score_directories[4], group) - return os.path.join(d_dir, "D_sameValue" + self.default_extension) - - def no_norm_file(self, model_id, group): - """Returns the score text file for the given model id of the given group.""" - no_norm_dir = os.path.join(self.score_directories[0], group) - return os.path.join(no_norm_dir, str(model_id) + ".txt") + self.compressed_extension - - def no_norm_result_file(self, group): - """Returns the resulting score text file for the given group.""" - no_norm_dir = self.score_directories[0] - return os.path.join(no_norm_dir, "scores-" + group) + self.compressed_extension - - - def zt_norm_file(self, model_id, group): - """Returns the score text file after ZT-normalization for the given model id of the given group.""" - zt_norm_dir = os.path.join(self.score_directories[1], group) - return os.path.join(zt_norm_dir, str(model_id) + ".txt") + self.compressed_extension - - def zt_norm_result_file(self, group): - """Returns the resulting score text file after ZT-normalization for the given group.""" - zt_norm_dir = self.score_directories[1] - return os.path.join(zt_norm_dir, "scores-" + group) + self.compressed_extension - - def calibrated_score_file(self, group, zt_norm=False): - """Returns the directory where calibrated scores can be found.""" - calibration_dir = self.score_directories[1 if zt_norm else 0] - return os.path.join(calibration_dir, "calibrated-" + group) + self.compressed_extension diff --git a/bob/bio/base/tools/__init__.py b/bob/bio/base/tools/__init__.py deleted file mode 100644 index 6a1e9b9a..00000000 --- a/bob/bio/base/tools/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .FileSelector import * -from .preprocessor import * -from .extractor import * -from .algorithm import * -from .scoring import * -from .command_line import * -from .grid import * - -# gets sphinx autodoc done right - don't remove it -__all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/bio/base/tools/algorithm.py b/bob/bio/base/tools/algorithm.py deleted file mode 100644 index 2bbaa558..00000000 --- a/bob/bio/base/tools/algorithm.py +++ /dev/null @@ -1,363 +0,0 @@ -import bob.io.base -import os - -import logging -import inspect -logger = logging.getLogger("bob.bio.base") - -from .FileSelector import FileSelector -from .extractor import read_features -from .. import utils - - -def train_projector(algorithm, extractor, allow_missing_files = False, force = False): - """Trains the feature projector using extracted features of the ``'world'`` group, if the algorithm requires projector training. - - This function should only be called, when the ``algorithm`` actually requires projector training. - The projector of the given ``algorithm`` is trained using extracted features. - It writes the projector to the file specified by the :py:class:`bob.bio.base.tools.FileSelector`. - By default, if the target file already exist, it is not re-created. - - **Parameters:** - - algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived - The algorithm, in which the projector should be trained. - - extractor : py:class:`bob.bio.base.extractor.Extractor` or derived - The extractor, used for reading the training data. - - allow_missing_files : bool - If set to ``True``, extracted files that are not found are silently ignored during training. - - force : bool - If given, the projector file is regenerated, even if it already exists. - """ - if not algorithm.requires_projector_training: - logger.warn("The train_projector function should not have been called, since the algorithm does not need projector training.") - return - - # the file selector object - fs = FileSelector.instance() - - if utils.check_file(fs.projector_file, force, - algorithm.min_projector_file_size): - logger.info("- Projection: projector '%s' already exists.", fs.projector_file) - else: - bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file)) - # train projector - logger.info("- Projection: loading training data") - - train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client) - train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client, allow_missing_files) - if algorithm.split_training_features_by_client: - logger.info("- Projection: training projector '%s' using %d identities: ", fs.projector_file, len(train_files)) - else: - logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files)) - - # perform training - if utils.is_argument_available("metadata", algorithm.train_projector): - metadata = fs.database.training_files('train_projector', algorithm.split_training_features_by_client) - algorithm.train_projector(train_features, fs.projector_file, metadata=metadata) - else: - algorithm.train_projector(train_features, fs.projector_file) - - -def project(algorithm, extractor, groups = None, indices = None, allow_missing_files = False, force = False): - """Projects the features for all files of the database. - - The given ``algorithm`` is used to project all features required for the current experiment. - It writes the projected data into the directory specified by the :py:class:`bob.bio.base.tools.FileSelector`. - By default, if target files already exist, they are not re-created. - - The extractor is only used to load the data in a coherent way. - - **Parameters:** - - algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived - The algorithm, used for projecting features and writing them to file. - - extractor : py:class:`bob.bio.base.extractor.Extractor` or derived - The extractor, used for reading the extracted features, which should be projected. - - groups : some of ``('world', 'dev', 'eval')`` or ``None`` - The list of groups, for which the data should be projected. - - indices : (int, int) or None - If specified, only the features for the given index range ``range(begin, end)`` should be projected. - This is usually given, when parallel threads are executed. - - allow_missing_files : bool - If set to ``True``, extracted files that are not found are silently ignored. - - force : bool - If given, files are regenerated, even if they already exist. - """ - if not algorithm.performs_projection: - logger.warn("The project function should not have been called, since the algorithm does not perform projection.") - return - - # the file selector object - fs = FileSelector.instance() - - # load the projector - algorithm.load_projector(fs.projector_file) - - feature_files = fs.feature_list(groups=groups) - projected_files = fs.projected_list(groups=groups) - metadata = fs.original_data_list(groups=groups) - - # select a subset of indices to iterate - if indices is not None: - index_range = range(indices[0], indices[1]) - logger.info("- Projection: splitting of index range %s", str(indices)) - else: - index_range = range(len(feature_files)) - - logger.info("- Projection: projecting %d features from directory '%s' to directory '%s'", len(index_range), fs.directories['extracted'], fs.directories['projected']) - # extract the features - for i in index_range: - feature_file = feature_files[i] - projected_file = projected_files[i] - - if not os.path.exists(feature_file): - if allow_missing_files: - logger.debug("... Cannot find extracted feature file %s; skipping", feature_file) - continue - else: - logger.error("Cannot find extracted feature file %s", feature_file) - - - if not utils.check_file(projected_file, force, - algorithm.min_projected_file_size): - logger.debug("... Projecting features for file '%s' (%d/%d)", - feature_file, index_range.index(i)+1, len(index_range)) - # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere) - bob.io.base.create_directories_safe(os.path.dirname(projected_file)) - # load feature - feature = extractor.read_feature(feature_file) - - # project feature - if "metadata" in inspect.getargspec(algorithm.project).args: - projected = algorithm.project(feature, metadata=metadata[i]) - else: - projected = algorithm.project(feature) - - if projected is None: - if allow_missing_files: - logger.debug("... Projection for extracted file %s failed; skipping", feature_file) - continue - else: - raise RuntimeError("Projection of file '%s' was not successful" % feature_file) - - - # write it - algorithm.write_feature(projected, projected_file) - - else: - logger.debug("... Skipping feature file '%s' since projected file '%s' exists", feature_file, projected_file) - - - -def train_enroller(algorithm, extractor, allow_missing_files = False, force = False): - """Trains the model enroller using the extracted or projected features, depending on your setup of the algorithm. - - This function should only be called, when the ``algorithm`` actually requires enroller training. - The enroller of the given ``algorithm`` is trained using extracted or projected features. - It writes the enroller to the file specified by the :py:class:`bob.bio.base.tools.FileSelector`. - By default, if the target file already exist, it is not re-created. - - **Parameters:** - - algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived - The algorithm, in which the enroller should be trained. - It is assured that the projector file is read (if required) before the enroller training is started. - - extractor : py:class:`bob.bio.base.extractor.Extractor` or derived - The extractor, used for reading the training data, if unprojected features are used for enroller training. - - allow_missing_files : bool - If set to ``True``, extracted files that are not found are silently ignored during training. - - force : bool - If given, the enroller file is regenerated, even if it already exists. - """ - if not algorithm.requires_enroller_training: - logger.warn("The train_enroller function should not have been called, since the algorithm does not need enroller training.") - return - - # the file selector object - fs = FileSelector.instance() - - if utils.check_file(fs.enroller_file, force, - algorithm.min_enroller_file_size): - logger.info("- Enrollment: enroller '%s' already exists.", fs.enroller_file) - else: - # define the tool that is required to read the features - reader = algorithm if algorithm.use_projected_features_for_enrollment else extractor - bob.io.base.create_directories_safe(os.path.dirname(fs.enroller_file)) - - # first, load the projector - if algorithm.requires_projector_training: - algorithm.load_projector(fs.projector_file) - - # load training data - train_files = fs.training_list('projected' if algorithm.use_projected_features_for_enrollment else 'extracted', 'train_enroller', arrange_by_client = True) - logger.info("- Enrollment: loading %d enroller training files of %d identities", sum(len(client_files) for client_files in train_files), len(train_files)) - train_features = read_features(train_files, reader, True, allow_missing_files) - - # perform training - logger.info("- Enrollment: training enroller '%s' using %d identities", fs.enroller_file, len(train_features)) - algorithm.train_enroller(train_features, fs.enroller_file) - - - -def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev', 'eval'], types = ['N', 'T'], allow_missing_files = False, force = False): - """Enroll the models for the given groups, eventually for both models and T-Norm-models. - This function uses the extracted or projected features to compute the models, depending on your setup of the given ``algorithm``. - - The given ``algorithm`` is used to enroll all models required for the current experiment. - It writes the models into the directories specified by the :py:class:`bob.bio.base.tools.FileSelector`. - By default, if target files already exist, they are not re-created. - - The extractor is only used to load features in a coherent way. - - **Parameters:** - - algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived - The algorithm, used for enrolling model and writing them to file. - - extractor : py:class:`bob.bio.base.extractor.Extractor` or derived - The extractor, used for reading the extracted features, if the algorithm enrolls models from unprojected data. - - compute_zt_norm : bool - If set to ``True`` and `'T'`` is part of the ``types``, also T-norm models are extracted. - - indices : (int, int) or None - If specified, only the models for the given index range ``range(begin, end)`` should be enrolled. - This is usually given, when parallel threads are executed. - - groups : some of ``('dev', 'eval')`` - The list of groups, for which models should be enrolled. - - allow_missing_files : bool - If set to ``True``, extracted or ptojected files that are not found are silently ignored. - If none of the enroll files are found, no model file will be written. - - force : bool - If given, files are regenerated, even if they already exist. - """ - # the file selector object - fs = FileSelector.instance() - # read the projector file, if needed - if algorithm.requires_projector_training: - algorithm.load_projector(fs.projector_file) - # read the model enrollment file - algorithm.load_enroller(fs.enroller_file) - - # which tool to use to read the features... - reader = algorithm if algorithm.use_projected_features_for_enrollment else extractor - - # Checking if we need to ship the metadata to the method enroll - has_metadata = utils.is_argument_available("metadata", algorithm.enroll) - - # Create Models - if 'N' in types: - for group in groups: - model_ids = fs.model_ids(group) - - if indices is not None: - model_ids = model_ids[indices[0]:indices[1]] - logger.info("- Enrollment: splitting of index range %s", str(indices)) - - logger.info("- Enrollment: enrolling models of group '%s'", group) - for pos, model_id in enumerate(model_ids): - # Path to the model - model_file = fs.model_file(model_id, group) - - # Removes old file if required - if not utils.check_file(model_file, force, - algorithm.min_model_file_size): - - enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') - if allow_missing_files: - enroll_files = utils.filter_missing_files(enroll_files) - if not enroll_files: - logger.debug("... Skipping model file %s since no feature file could be found", model_file) - continue - - logger.debug("... Enrolling model '%s' from %d feature(s) to " - "file '%s' (%d/%d)", model_id, len(enroll_files), model_file, - pos+1, len(model_ids)) - bob.io.base.create_directories_safe(os.path.dirname(model_file)) - - # load all files into memory - enroll_features = [reader.read_feature(enroll_file) for enroll_file in enroll_files] - - if has_metadata: - metadata = fs.database.enroll_files(group=group, model_id=model_id) - model = algorithm.enroll(enroll_features, metadata=metadata) - else: - model = algorithm.enroll(enroll_features) - - if model is None: - if allow_missing_files: - logger.debug("... Enrollment for model %s failed; skipping", model_id) - continue - else: - raise RuntimeError("Enrollemnt of model '%s' was not successful" % model_id) - - # save the model - algorithm.write_model(model, model_file) - - else: - logger.debug("... Skipping model file '%s' since it exists", model_file) - - - # T-Norm-Models - if 'T' in types and compute_zt_norm: - for group in groups: - t_model_ids = fs.t_model_ids(group) - - if indices is not None: - t_model_ids = t_model_ids[indices[0]:indices[1]] - logger.info("- Enrollment: splitting of index range %s", str(indices)) - - logger.info("- Enrollment: enrolling T-models of group '%s'", group) - for t_model_id in t_model_ids: - # Path to the model - t_model_file = fs.t_model_file(t_model_id, group) - - # Removes old file if required - if not utils.check_file(t_model_file, force, - algorithm.min_model_file_size): - t_enroll_files = fs.t_enroll_files(t_model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted') - - if allow_missing_files: - t_enroll_files = utils.filter_missing_files(t_enroll_files) - if not t_enroll_files: - logger.debug("... Skipping T-model file %s since no feature file could be found", t_model_file) - continue - - logger.debug("... Enrolling T-model from %d features to file '%s'", len(t_enroll_files), t_model_file) - bob.io.base.create_directories_safe(os.path.dirname(t_model_file)) - - # load all files into memory - t_enroll_features = [reader.read_feature(t_enroll_file) for t_enroll_file in t_enroll_files] - - if has_metadata: - metadata = fs.database.enroll_files(group=group, model_id=t_model_id) - t_model = algorithm.enroll(t_enroll_features, metadata=metadata) - else: - t_model = algorithm.enroll(t_enroll_features) - - if t_model is None: - if allow_missing_files: - logger.debug("... Enrollment for T-model %s failed; skipping", t_model_id) - continue - else: - raise RuntimeError("Enrollemnt of T-model '%s' was not successful", t_model_id) - - # save model - algorithm.write_model(t_model, t_model_file) - else: - logger.debug("... Skipping T-model file '%s' since it exists", t_model_file) diff --git a/bob/bio/base/tools/command_line.py b/bob/bio/base/tools/command_line.py deleted file mode 100644 index d8bb712f..00000000 --- a/bob/bio/base/tools/command_line.py +++ /dev/null @@ -1,626 +0,0 @@ -"""Execute biometric recognition algorithms on a certain biometric database. -""" - -import argparse -import os -import socket -import sys -import six - -import bob.core -import bob.extension - -logger = bob.core.log.setup("bob.bio.base") - -from .. import utils -from . import FileSelector - - -def is_idiap(): - return os.path.isdir("/idiap") and "USER" in os.environ - - -def command_line_config_group(parser, package_prefix='bob.bio.', exclude_resources_from=[]): - """ - Generic configuration command lines that can be used by different toolchains, e.g., in bob.bio or bob.pad. - :param parser: Parser to which this argument group should be added - :param package_prefix: prefix of a package, in which these arguments should be use, e.g., in bob.bio. or bob.pad. - :param exclude_resources_from: resources that should be excluded from the commandline - :return: new config argument group added to the parser - """ - - config_group = parser.add_argument_group( - '\nParameters defining the experiment. Most of these parameters can be a registered ' - 'resource, a configuration file, or even a string that defines a newly created object') - config_group.add_argument('configuration_file', metavar='PATH', nargs='*', - help='A configuration file containing one or more of "database", "preprocessor", ' - '"extractor", "algorithm" and/or "grid"') - config_group.add_argument('-H', '--create-configuration-file', metavar='PATH', - help='If selected, an empty configuration file will be created, and no further process is executed') - config_group.add_argument('-d', '--database', metavar='x', nargs='+', - help='Database and the protocol; registered databases are: %s' % utils.resource_keys( - 'database', exclude_resources_from, package_prefix=package_prefix)) - config_group.add_argument('-p', '--preprocessor', metavar='x', nargs='+', - help='Data preprocessing; registered preprocessors are: %s' % utils.resource_keys( - 'preprocessor', exclude_resources_from, package_prefix=package_prefix)) - config_group.add_argument('-e', '--extractor', metavar='x', nargs='+', - help='Feature extraction; registered feature extractors are: %s' % utils.resource_keys( - 'extractor', exclude_resources_from, package_prefix=package_prefix)) - config_group.add_argument('-a', '--algorithm', metavar='x', nargs='+', - help='Algorithm of the experiment; registered algorithms are: %s' % utils.resource_keys( - 'algorithm', exclude_resources_from, package_prefix=package_prefix)) - config_group.add_argument('-g', '--grid', metavar='x', nargs='+', - help='Configuration for the grid setup; if not specified, the commands are executed ' - 'sequentially on the local machine; registered grid resources are %s.' % - utils.resource_keys('grid', exclude_resources_from, package_prefix=package_prefix)) - config_group.add_argument('-I', '--imports', metavar='LIB', nargs='+', default=[package_prefix + 'base'], - help='If one of your configuration files is an actual command, please specify the ' - 'lists of required libraries (imports) to execute this command') - config_group.add_argument('-W', '--preferred-package', metavar='LIB', - help='If resources with identical names are defined in several packages, prefer the ' - 'one from the given package') - config_group.add_argument('-s', '--sub-directory', metavar='DIR', - help='The sub-directory where the files of the current experiment should be stored. ' - 'Please specify a directory name with a name describing your experiment') - config_group.add_argument('--groups', metavar='GROUP', nargs='+', default=['dev'], - help="The groups (i.e., 'dev', 'eval') for which the models and scores should be " - "generated; by default, only the 'dev' group is evaluated") - config_group.add_argument('-P', '--protocol', metavar='PROTOCOL', - help='Overwrite the protocol that is stored in the database by the given one ' - '(might not by applicable for all databases).') - - config_group.add_argument('--package-prefix', default=package_prefix, help=argparse.SUPPRESS) - - return config_group - - -def command_line_parser(description=__doc__, exclude_resources_from=[]): - """command_line_parser(description=__doc__, exclude_resources_from=[]) -> parsers - - Creates an :py:class:`argparse.ArgumentParser` object that includes the minimum set of command - line options (which is not so few). - The ``description`` can be overwritten, but has a (small) default. - - Included in the parser, several groups are defined. - Each group specifies a set of command line options. - For the configurations, registered resources are listed, which can be limited by the - ``exclude_resources_from`` list of extensions. - - It returns a dictionary, containing the parser object itself (in the ``'main'`` keyword), - and a list of command line groups. - - **Parameters:** - - description : str - The documentation of the script. - - exclude_resources_from : [str] - A list of extension packages, for which resources should not be listed. - - **Returns:** - - parsers : dict - A dictionary of parser groups, with the main parser under the 'main' key. - Feel free to add more options to any of the parser groups. - """ - parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter, - conflict_handler='resolve') - - ####################################################################################### - ############## options that are required to be specified ####################### - config_group = command_line_config_group(parser, package_prefix='bob.bio.', - exclude_resources_from=exclude_resources_from) - - ####################################################################################### - ############## options to modify default directories or file names #################### - - # directories differ between idiap and extern - temp = "/idiap/temp/%s/[database-name]/[sub-directory]" % os.environ["USER"] if is_idiap() else "temp" - results = "/idiap/user/%s/[database-name]/[sub-directory]" % os.environ["USER"] if is_idiap() else "results" - database_replacement = "%s/.bob_bio_databases.txt" % os.environ["HOME"] - - dir_group = parser.add_argument_group('\nDirectories that can be changed according to your requirements') - dir_group.add_argument('-T', '--temp-directory', metavar='DIR', - help='The directory for temporary files; if --temp-directory is not specified, "%s" is used' % temp) - dir_group.add_argument('-R', '--result-directory', metavar='DIR', - help='The directory for resulting score files; if --result-directory is not specified, "%s" is used' % results) - - file_group = parser.add_argument_group('\nName (maybe including a path relative to the --temp-directory, ' - 'if not specified otherwise) of files that will be generated. ' - 'Note that not all files will be used by all algorithms') - file_group.add_argument('--extractor-file', metavar='FILE', default='Extractor.hdf5', - help='Name of the file to write the feature extractor into.') - file_group.add_argument('--projector-file', metavar='FILE', default='Projector.hdf5', - help='Name of the file to write the feature projector into.') - file_group.add_argument('--enroller-file', metavar='FILE', default='Enroller.hdf5', - help='Name of the file to write the model enroller into.') - file_group.add_argument('-G', '--gridtk-database-file', metavar='FILE', default='submitted.sql3', - help='The database file in which the submitted jobs will be written; relative to the current directory ' - '(only valid with the --grid option).') - file_group.add_argument('--experiment-info-file', metavar='FILE', default='Experiment.info', - help='The file where the configuration of all parts of the experiments are written; ' - 'relative to te --result-directory.') - file_group.add_argument('-D', '--database-directories-file', metavar='FILE', default=database_replacement, - help='An optional file, where database directories are stored (to avoid changing the database configurations)') - - sub_dir_group = parser.add_argument_group('\nSubdirectories of certain parts of the tool chain. ' - 'You can specify directories in case you want to reuse parts of ' - 'the experiments (e.g. extracted features) in other experiments. ' - 'Please note that these directories are relative to the --temp-directory, ' - 'but you can also specify absolute paths') - sub_dir_group.add_argument('--preprocessed-directory', metavar='DIR', default='preprocessed', - help='Name of the directory of the preprocessed data.') - sub_dir_group.add_argument('--extracted-directory', metavar='DIR', default='extracted', - help='Name of the directory of the extracted features.') - sub_dir_group.add_argument('--projected-directory', metavar='DIR', default='projected', - help='Name of the directory where the projected data should be stored.') - sub_dir_group.add_argument('--model-directories', metavar='DIR', nargs='+', default=['models', 'tmodels'], - help='Name of the directory where the models (and T-Norm models) should be stored') - sub_dir_group.add_argument('--score-directories', metavar='DIR', nargs='+', default=['nonorm', 'ztnorm'], - help='Name of the directory (relative to --result-directory) where to write the results to') - sub_dir_group.add_argument('--zt-directories', metavar='DIR', nargs=5, - default=['zt_norm_A', 'zt_norm_B', 'zt_norm_C', 'zt_norm_D', 'zt_norm_D_sameValue'], - help='Name of the directories (of --temp-directory) where to write the ZT-norm values; ' - 'only used with --zt-norm') - sub_dir_group.add_argument('--grid-log-directory', metavar='DIR', default='gridtk_logs', - help='Name of the directory (relative to --temp-directory) where to log files are written; ' - 'only used with --grid') - - flag_group = parser.add_argument_group('\nFlags that change the behavior of the experiment') - bob.core.log.add_command_line_option(flag_group) - flag_group.add_argument('-q', '--dry-run', action='store_true', - help='Only report the commands that will be executed, but do not execute them.') - flag_group.add_argument('-F', '--force', action='store_true', - help='Force to erase former data if already exist') - flag_group.add_argument('-U', '--write-five-column-score-files', action='store_true', - help='Writes score files in five-column format (including the model id)') - flag_group.add_argument('-Z', '--write-compressed-score-files', action='store_true', - help='Writes score files which are compressed with tar.bz2.') - flag_group.add_argument('-S', '--stop-on-failure', action='store_true', - help='Try to recursively stop the dependent jobs from the SGE grid queue, when a job failed') - flag_group.add_argument('-X', '--external-dependencies', type=int, default=[], nargs='+', - help='The jobs submitted to the grid have dependencies on the given job ids.') - flag_group.add_argument('-B', '--timer', choices=('real', 'system', 'user'), nargs='*', - help='Measure and report the time required by the execution of the tool chain (only on local machine)') - flag_group.add_argument('-L', '--run-local-scheduler', action='store_true', - help='Starts the local scheduler after submitting the jobs to the local queue (by default, ' - 'local jobs must be started by hand, e.g., using ./bin/jman --local -vv run-scheduler -x)') - flag_group.add_argument('-N', '--nice', type=int, default=10, - help='Runs the local scheduler with the given nice value') - flag_group.add_argument('-D', '--delete-jobs-finished-with-status', choices=('all', 'failure', 'success'), - help='If selected, local scheduler jobs that finished with the given status are deleted from ' - 'the --gridtk-database-file; otherwise the jobs remain in the database') - flag_group.add_argument('-C', '--calibrate-scores', action='store_true', - help='Performs score calibration after the scores are computed.') - flag_group.add_argument('-z', '--zt-norm', action='store_true', - help='Enable the computation of ZT norms') - flag_group.add_argument('-A', '--allow-missing-files', action='store_true', - help="If given, missing files will not stop the processing; this is helpful if not all files of the " - "database can be processed; missing scores will be NaN.") - flag_group.add_argument('-r', '--parallel', type=int, - help='This flag is a shortcut for running the commands on the local machine with the given amount of ' - 'parallel processes; equivalent to --grid bob.bio.base.grid.Grid("local", ' - 'number_of_parallel_processes=X) --run-local-scheduler --stop-on-failure.') - - flag_group.add_argument('-t', '--environment', dest='env', nargs='*', default=[], - help='Passes specific environment variables to the job.') - - return { - 'main': parser, - 'config': config_group, - 'dir': dir_group, - 'sub-dir': sub_dir_group, - 'file': file_group, - 'flag': flag_group - } - - -def command_line_skip_group(parsers, command_line_parameters, skips): - # add execute-only flags to command line options - if skips is not None: - ####################################################################################### - ################# options for skipping parts of the toolchain ######################### - skip_group = parsers['main'].add_argument_group( - '\nFlags that allow to skip certain parts of the experiments. This does only make sense when the ' - 'generated files are already there (e.g. when reusing parts of other experiments)') - for skip in skips: - skip_group.add_argument('--skip-%s' % skip, action='store_true', help='Skip the %s step.' % skip) - skip_group.add_argument('-o', '--execute-only', nargs='+', choices=skips, - help='If specified, executes only the given parts of the tool chain.') - - # parse the arguments - parser = parsers['main'] - args = parser.parse_args(command_line_parameters) - return args - - -def take_from_config_or_command_line(args, config, keyword, default, required=True, is_resource=True): - if getattr(args, keyword) is not None and getattr(args, keyword) != default: - if is_resource: - setattr(args, keyword, utils.load_resource(' '.join(getattr(args, keyword)), keyword, - imports=args.imports, package_prefix=args.package_prefix, - preferred_package=args.preferred_package)) - - elif config is not None and hasattr(config, keyword): - val = getattr(config, keyword) - if isinstance(val, six.string_types) and is_resource: - val = utils.load_resource(val, keyword, imports=args.imports, package_prefix=args.package_prefix, - preferred_package=args.preferred_package) - setattr(args, keyword, val) - - elif default is not None: - if is_resource: - setattr(args, keyword, utils.load_resource(' '.join(default), keyword, - imports=args.imports, package_prefix=args.package_prefix, - preferred_package=args.preferred_package)) - - elif required: - raise ValueError("Please specify '%s' either on command line (via '--%s') or in a configuration file" % - (keyword, keyword.replace("_","-"))) - - if config is not None and hasattr(config, keyword): - setattr(config, keyword, None) - - -def check_config_consumed(config): - if config is not None: - import inspect - for keyword in dir(config): - if not keyword.startswith('_') and not keyword.isupper(): - attr = getattr(config, keyword) - if attr is not None and not inspect.isclass(attr) and not inspect.ismodule(attr): - logger.warn("The variable '%s' in a configuration file is not known or not supported by this application; use a '_' prefix to the variable name (e.g., '_%s') to suppress this warning", keyword, keyword) - - -def parse_config_file(parsers, args, args_dictionary, keywords, skips): - parser = parsers['main'] - # check if the "create_configuration_file" function was requested - if args.create_configuration_file is not None: - # update list of options to be written into the config file - set_required_common_optional_arguments(required=args_dictionary['required'], - common=args_dictionary['common'], - optional=args_dictionary['optional']) - # this will exit at the end - create_configuration_file(parsers, args) - - # first, read the configuration file and set everything from the config file to the args -- as - # long as not overwritten on command line - config = utils.read_config_file(args.configuration_file) if args.configuration_file else None - for keyword in ("database", "preprocessor", "extractor", "algorithm"): - take_from_config_or_command_line(args, config, keyword, - parser.get_default(keyword)) - - take_from_config_or_command_line(args, config, "grid", - parser.get_default('grid'), required=False) - - take_from_config_or_command_line(args, config, "sub_directory", - parser.get_default("sub_directory"), is_resource=False) - - take_from_config_or_command_line(args, config, "env", - parser.get_default("env"), is_resource=False) - - skip_keywords = tuple(['skip_' + k.replace('-', '_') for k in skips]) - - for keyword in keywords + skip_keywords + ('execute_only',): - take_from_config_or_command_line(args, config, keyword, - parser.get_default(keyword), required=False, is_resource=False) - - # check that all variables in the config file are consumed by the above options - check_config_consumed(config) - - # evaluate skips - if skips is not None and args.execute_only is not None: - for skip in skips: - if skip not in args.execute_only: - setattr(args, "skip_%s" % skip.replace("-", "_"), True) - return args - - -def set_extra_flags(args): - if args.parallel is not None: - args.grid = bob.bio.base.grid.Grid("local", number_of_parallel_processes=args.parallel) - args.run_local_scheduler = True - args.stop_on_failure = True - - # logging - bob.core.log.set_verbosity_level(logger, args.verbose) - - # timer - if args.timer is not None and not len(args.timer): - args.timer = ('real', 'system', 'user') - - # set base directories - if args.temp_directory is None: - args.temp_directory = "/idiap/temp/%s/%s" % (os.environ["USER"], args.database.name) if is_idiap() else "temp" - if args.result_directory is None: - args.result_directory = "/idiap/user/%s/%s" % ( - os.environ["USER"], args.database.name) if is_idiap() else "results" - - args.temp_directory = os.path.join(args.temp_directory, args.sub_directory) - args.result_directory = os.path.join(args.result_directory, args.sub_directory) - args.grid_log_directory = os.path.join(args.temp_directory, args.grid_log_directory) - - return args - - -def initialize(parsers, command_line_parameters=None, skips=[]): - """initialize(parsers, command_line_parameters = None, skips = []) -> args - - Parses the command line and arranges the arguments accordingly. - Afterward, it loads the resources for the database, preprocessor, extractor, algorithm and grid (if specified), - and stores the results into the returned args. - - This function also initializes the :py:class:`FileSelector` instance by arranging the directories and - files according to the command line parameters. - - If the ``skips`` are given, an '--execute-only' parameter is added to the parser, according skips are selected. - - **Parameters:** - - parsers : dict - The dictionary of command line parsers, as returned from :py:func:`command_line_parser`. - Additional arguments might have been added. - - command_line_parameters : [str] or None - The command line parameters that should be interpreted. - By default, the parameters specified by the user on command line are considered. - - skips : [str] - A list of possible ``--skip-...`` options to be added and evaluated automatically. - - **Returns:** - - args : namespace - A namespace of arguments as read from the command line. - - .. note:: The database, preprocessor, extractor, algorithm and grid (if specified) are actual instances - of the according classes. - - """ - - from bob.bio.base.database import BioDatabase - - args = command_line_skip_group(parsers, command_line_parameters, skips) - args_dictionary = {'required': ['database', 'preprocessor', 'extractor', 'algorithm', 'sub_directory'], - 'common': ['protocol', 'grid', 'parallel', 'verbose', 'groups', 'temp_directory', - 'result_directory', 'zt_norm', 'allow_missing_files', 'dry_run', 'force'], - 'optional': ['preprocessed_directory', 'extracted_directory', 'projected_directory', - 'model_directories', 'extractor_file', 'projector_file', 'enroller_file'] - } - keywords = ( - "protocol", - "groups", - "parallel", - "preferred_package", - "temp_directory", - "result_directory", - "extractor_file", - "projector_file", - "enroller_file", - "gridtk_database_file", - "experiment_info_file", - "database_directories_file", - "preprocessed_directory", - "extracted_directory", - "projected_directory", - "model_directories", - "score_directories", - "zt_directories", - "grid_log_directory", - "verbose", - "dry_run", - "force", - "write_compressed_score_files", - "stop_on_failure", - "run_local_scheduler", - "external_dependencies", - "timer", - "nice", - "delete_jobs_finished_with_status", - "calibrate_scores", - "zt_norm", - "allow_missing_files", - "env", - ) - args = parse_config_file(parsers, args, args_dictionary, keywords, skips) - - args = set_extra_flags(args) - - # protocol command line override - if args.protocol is not None: - args.database.protocol = args.protocol - - protocol = 'None' if args.database.protocol is None else args.database.protocol - - # result files - args.info_file = os.path.join(args.result_directory, protocol, args.experiment_info_file) - - # sub-directories that depend on the database - extractor_sub_dir = protocol if args.database.training_depends_on_protocol and \ - args.extractor.requires_training else '.' - projector_sub_dir = protocol if args.database.training_depends_on_protocol and \ - args.algorithm.requires_projector_training else extractor_sub_dir - enroller_sub_dir = protocol if args.database.training_depends_on_protocol and \ - args.algorithm.requires_enroller_training else projector_sub_dir - model_sub_dir = protocol if args.database.models_depend_on_protocol else enroller_sub_dir - - # Database directories, which should be automatically replaced - if isinstance(args.database, BioDatabase): - args.database.replace_directories(args.database_directories_file) - - # initialize the file selector - FileSelector.create( - database=args.database, - extractor_file=os.path.join(args.temp_directory, extractor_sub_dir, args.extractor_file), - projector_file=os.path.join(args.temp_directory, projector_sub_dir, args.projector_file), - enroller_file=os.path.join(args.temp_directory, enroller_sub_dir, args.enroller_file), - - preprocessed_directory=os.path.join(args.temp_directory, args.preprocessed_directory), - extracted_directory=os.path.join(args.temp_directory, extractor_sub_dir, args.extracted_directory), - projected_directory=os.path.join(args.temp_directory, projector_sub_dir, args.projected_directory), - model_directories=[os.path.join(args.temp_directory, model_sub_dir, m) for m in args.model_directories], - score_directories=[os.path.join(args.result_directory, protocol, z) for z in args.score_directories], - zt_score_directories=[os.path.join(args.temp_directory, protocol, s) for s in args.zt_directories], - compressed_extension='.tar.bz2' if args.write_compressed_score_files else '', - default_extension='.hdf5', - zt_norm = args.zt_norm - ) - - return args - - -def groups(args): - """groups(args) -> groups - - Returns the groups, for which the files must be preprocessed, and features must be extracted and projected. - This function should be used in order to eliminate the training files (the ``'world'`` group), - when no training is required in this experiment. - - **Parameters:** - - args : namespace - The interpreted command line arguments as returned by the :py:func:`initialize` function. - - **Returns:** - - groups : [str] - A list of groups, for which data needs to be treated. - """ - groups = args.groups[:] - if args.extractor.requires_training or args.algorithm.requires_projector_training or \ - args.algorithm.requires_enroller_training: - groups.append('world') - return groups - - -def command_line(cmdline): - """command_line(cmdline) -> str - - Converts the given options to a string that can be executed in a terminal. - Parameters are enclosed into ``'...'`` quotes so that the command line can interpret them (e.g., - if they contain spaces or special characters). - - **Parameters:** - - cmdline : [str] - A list of command line options to be converted into a string. - - **Returns:** - - str : str - The command line string that can be copy-pasted into the terminal. - """ - c = "" - for cmd in cmdline: - if cmd[0] in '/-': - c += "%s " % cmd - else: - c += "'%s' " % cmd - return c - - -def write_info(args, command_line_parameters, executable): - """Writes information about the current experimental setup into a file specified on command line. - - **Parameters:** - - args : namespace - The interpreted command line arguments as returned by the :py:func:`initialize` function. - - command_line_parameters : [str] or ``None`` - The command line parameters that have been interpreted. - If ``None``, the parameters specified by the user on command line are considered. - - executable : str - The name of the executable (such as ``'./bin/verify.py'``) that is used to run the experiments. - """ - if command_line_parameters is None: - command_line_parameters = sys.argv[1:] - # write configuration - try: - bob.io.base.create_directories_safe(os.path.dirname(args.info_file)) - f = open(args.info_file, 'w') - f.write("Command line:\n") - f.write(command_line([executable] + command_line_parameters) + "\n\n") - f.write("Host: %s\n" % socket.gethostname()) - f.write("Configuration:\n") - f.write("Database:\n%s\n\n" % args.database) - f.write("Preprocessor:\n%s\n\n" % args.preprocessor) - f.write("Extractor:\n%s\n\n" % args.extractor) - f.write("Algorithm:\n%s\n\n" % args.algorithm) - except IOError: - logger.error("Could not write the experimental setup into file '%s'", args.info_file) - - -global _required_list, _common_list, _optional_list -_required_list = set() -_common_list = set() -_optional_list = set() - - -def set_required_common_optional_arguments(required=[], common=[], optional=[]): - _required_list.update(required) - _common_list.update(common) - _optional_list.update(optional) - - -def create_configuration_file(parsers, args): - """This function writes an empty configuration file with all possible options.""" - logger.info("Writing configuration file %s", args.create_configuration_file) - import datetime - executables = bob.extension.find_executable(os.path.basename(sys.argv[0]), - prefixes=[os.path.dirname(sys.argv[0]), 'bin']) - if not executables: - executables = [sys.argv[0]] - - parser = parsers['main'] - - bob.io.base.create_directories_safe(os.path.dirname(args.create_configuration_file)) - - required = "# Configuration file automatically generated at %s for %s.\n\n" % ( - datetime.date.today(), executables[0]) - required += "##################################################\n" \ - "############### REQUIRED ARGUMENTS ###############\n" \ - "##################################################\n\n" - required += "# These arguments need to be set.\n\n\n" - common = "##################################################\n" \ - "################ COMMON ARGUMENTS ################\n" \ - "##################################################\n\n" - common += "# These arguments are commonly changed.\n\n\n" - optional = "##################################################\n" \ - "############### OPTIONAL ARGUMENTS ###############\n" \ - "##################################################\n\n" - optional += "# Files and directories might commonly be specified with absolute paths or " \ - "relative to the temp_directory.\n# Change these options, e.g., to reuse parts " \ - "of other experiments.\n\n\n" - rare = "##################################################\n" \ - "############ RARELY CHANGED ARGUMENTS ############\n" \ - "##################################################\n\n\n" - - with open(args.create_configuration_file, 'w') as f: - - for action in parser._actions[3:]: - if action.help == "==SUPPRESS==": - continue - - tmp = "# %s\n\n" % action.help - if action.nargs is None and action.type is None and action.default is not None: - tmp += "#%s = '%s'\n\n\n" % (action.dest, action.default) - else: - tmp += "#%s = %s\n\n\n" % (action.dest, action.default) - - if action.dest in _required_list: - required += tmp - elif action.dest in _common_list: - common += tmp - elif action.dest in _optional_list: - optional += tmp - else: - rare += tmp - - f.write(required) - f.write(common) - f.write(optional) - f.write(rare) - - parser.exit(1, "Configuration file '%s' was written; exiting\n" % args.create_configuration_file) diff --git a/bob/bio/base/tools/extractor.py b/bob/bio/base/tools/extractor.py deleted file mode 100644 index 7f822c49..00000000 --- a/bob/bio/base/tools/extractor.py +++ /dev/null @@ -1,179 +0,0 @@ -import bob.io.base -import os - -import logging -import inspect -logger = logging.getLogger("bob.bio.base") - -from .FileSelector import FileSelector -from .preprocessor import read_preprocessed_data -from .. import utils - -def train_extractor(extractor, preprocessor, allow_missing_files = False, force = False): - """Trains the feature extractor using preprocessed data of the ``'world'`` group, if the feature extractor requires training. - - This function should only be called, when the ``extractor`` actually requires training. - The given ``extractor`` is trained using preprocessed data. - It writes the extractor to the file specified by the :py:class:`bob.bio.base.tools.FileSelector`. - By default, if the target file already exist, it is not re-created. - - **Parameters:** - - extractor : py:class:`bob.bio.base.extractor.Extractor` or derived - The extractor to be trained. - - preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived - The preprocessor, used for reading the preprocessed data. - - allow_missing_files : bool - If set to ``True``, preprocessed data files that are not found are silently ignored during training. - - force : bool - If given, the extractor file is regenerated, even if it already exists. - """ - - if not extractor.requires_training: - logger.warn("The train_extractor function should not have been called, since the extractor does not need training.") - return - - # the file selector object - fs = FileSelector.instance() - # the file to write - if utils.check_file(fs.extractor_file, force, - extractor.min_extractor_file_size): - logger.info("- Extraction: extractor '%s' already exists.", fs.extractor_file) - else: - bob.io.base.create_directories_safe(os.path.dirname(fs.extractor_file)) - # read training files - train_files = fs.training_list('preprocessed', 'train_extractor', arrange_by_client = extractor.split_training_data_by_client) - train_data = read_preprocessed_data(train_files, preprocessor, extractor.split_training_data_by_client, allow_missing_files) - if extractor.split_training_data_by_client: - logger.info("- Extraction: training extractor '%s' using %d identities:", fs.extractor_file, len(train_files)) - else: - logger.info("- Extraction: training extractor '%s' using %d training files:", fs.extractor_file, len(train_files)) - # train model - extractor.train(train_data, fs.extractor_file) - - - -def extract(extractor, preprocessor, groups=None, indices = None, allow_missing_files = False, force = False): - """Extracts features from the preprocessed data using the given extractor. - - The given ``extractor`` is used to extract all features required for the current experiment. - It writes the extracted data into the directory specified by the :py:class:`bob.bio.base.tools.FileSelector`. - By default, if target files already exist, they are not re-created. - - The preprocessor is only used to load the data in a coherent way. - - **Parameters:** - - extractor : py:class:`bob.bio.base.extractor.Extractor` or derived - The extractor, used for extracting and writing the features. - - preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived - The preprocessor, used for reading the preprocessed data. - - groups : some of ``('world', 'dev', 'eval')`` or ``None`` - The list of groups, for which the data should be extracted. - - indices : (int, int) or None - If specified, only the features for the given index range ``range(begin, end)`` should be extracted. - This is usually given, when parallel threads are executed. - - allow_missing_files : bool - If set to ``True``, preprocessed data files that are not found are silently ignored. - - force : bool - If given, files are regenerated, even if they already exist. - """ - # the file selector object - fs = FileSelector.instance() - extractor.load(fs.extractor_file) - data_files = fs.preprocessed_data_list(groups=groups) - feature_files = fs.feature_list(groups=groups) - - if utils.is_argument_available("metadata", extractor.__call__): - metadata = fs.original_data_list(groups=groups) - else: - metadata = None - - # select a subset of indices to iterate - if indices is not None: - index_range = range(indices[0], indices[1]) - logger.info("- Extraction: splitting of index range %s" % str(indices)) - else: - index_range = range(len(data_files)) - - logger.info("- Extraction: extracting %d features from directory '%s' to directory '%s'", len(index_range), fs.directories['preprocessed'], fs.directories['extracted']) - for i in index_range: - data_file = data_files[i] - feature_file = feature_files[i] - - if not os.path.exists(data_file) and preprocessor.writes_data: - if allow_missing_files: - logger.debug("... Cannot find preprocessed data file %s; skipping", data_file) - continue - else: - logger.error("Cannot find preprocessed data file %s", data_file) - - if not utils.check_file(feature_file, force, - extractor.min_feature_file_size): - logger.debug("... Extracting features for data file '%s' (%d/%d)", - data_file, index_range.index(i)+1, len(index_range)) - # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere) - bob.io.base.create_directories_safe(os.path.dirname(feature_file)) - # load data - data = preprocessor.read_data(data_file) - - # extract feature - if metadata is None: - feature = extractor(data) - else: - feature = extractor(data, metadata=metadata[i]) - - if feature is None: - if allow_missing_files: - logger.debug("... Feature extraction for data file %s failed; skipping", data_file) - continue - else: - raise RuntimeError("Feature extraction of file '%s' was not successful" % data_file) - - - # write feature - extractor.write_feature(feature, feature_file) - else: - logger.debug("... Skipping preprocessed data '%s' since feature file '%s' exists", data_file, feature_file) - - -def read_features(file_names, extractor, split_by_client = False, allow_missing_files = False): - """read_features(file_names, extractor, split_by_client = False) -> extracted - - Reads the extracted features from ``file_names`` using the given ``extractor``. - If ``split_by_client`` is set to ``True``, it is assumed that the ``file_names`` are already sorted by client. - - **Parameters:** - - file_names : [str] or [[str]] - A list of names of files to be read. - If ``split_by_client = True``, file names are supposed to be split into groups. - - extractor : py:class:`bob.bio.base.extractor.Extractor` or derived - The extractor, used for reading the extracted features. - - split_by_client : bool - Indicates if the given ``file_names`` are split into groups. - - allow_missing_files : bool - If set to ``True``, extracted files that are not found are silently ignored. - - **Returns:** - - extracted : [object] or [[object]] - The list of extracted features, in the same order as in the ``file_names``. - """ - file_names = utils.filter_missing_files(file_names, split_by_client, allow_missing_files) - - if split_by_client: - return [[extractor.read_feature(f) for f in client_files] for client_files in file_names] - else: - return [extractor.read_feature(f) for f in file_names] diff --git a/bob/bio/base/tools/grid.py b/bob/bio/base/tools/grid.py deleted file mode 100644 index 909ac209..00000000 --- a/bob/bio/base/tools/grid.py +++ /dev/null @@ -1,134 +0,0 @@ -from __future__ import print_function - -import sys -import os -import math -from .. import grid -from .command_line import command_line - -import bob.core -import logging -logger = logging.getLogger("bob.bio.base") - -def indices(list_to_split, number_of_parallel_jobs, task_id=None): - """This function returns the first and last index for the files for the current job ID. - If no job id is set (e.g., because a sub-job is executed locally), it simply returns all indices.""" - - if number_of_parallel_jobs is None or number_of_parallel_jobs == 1: - return None - - # test if the 'SEG_TASK_ID' environment is set - sge_task_id = os.getenv('SGE_TASK_ID') if task_id is None else task_id - if sge_task_id is None: - # task id is not set, so this function is not called from a grid job - # hence, we process the whole list - return (0,len(list_to_split)) - else: - job_id = int(sge_task_id) - 1 - # compute number of files to be executed - number_of_objects_per_job = int(math.ceil(float(len(list_to_split) / float(number_of_parallel_jobs)))) - start = job_id * number_of_objects_per_job - end = min((job_id + 1) * number_of_objects_per_job, len(list_to_split)) - return (start, end) - - -class GridSubmission (object): - def __init__(self, args, command_line_parameters, executable = 'verify.py', first_fake_job_id = 0): - # find, where the executable is installed - import bob.extension - - if command_line_parameters is None: - command_line_parameters = sys.argv[1:] - - executables = bob.extension.find_executable(executable, prefixes = [os.path.dirname(sys.argv[0]), 'bin']) - if not len(executables): - raise IOError("Could not find the '%s' executable." % executable) - executable = executables[0] - assert os.path.isfile(executable) - self.executable = executable - - if args.grid is not None: - assert isinstance(args.grid, grid.Grid) - - self.env = args.env #Fetching the enviroment variable - - # find, where jman is installed - jmans = bob.extension.find_executable('jman', prefixes = ['bin']) - if not len(jmans): - raise IOError("Could not find the 'jman' executable. Have you installed GridTK?") - jman = jmans[0] - assert os.path.isfile(jman) - - self.args = args - self.command_line = [p for p in command_line_parameters if not p.startswith('--skip') and p not in ('-q', '--dry-run')] - self.fake_job_id = first_fake_job_id - - import gridtk - # setup logger - bob.core.log.set_verbosity_level(bob.core.log.setup("gridtk"), min(args.verbose,2)) - Manager = gridtk.local.JobManagerLocal if args.grid.is_local() else gridtk.sge.JobManagerSGE - self.job_manager = Manager(database = args.gridtk_database_file, wrapper_script=jman) - self.submitted_job_ids = [] - - - def submit(self, command, number_of_parallel_jobs = 1, dependencies=[], name = None, **kwargs): - """Submit a grid job with the given command, which is added to the default command line. - If the name is not given, it will take the second parameter of the ``command`` as name. - """ - dependencies = dependencies + self.args.external_dependencies - - # create the command to be executed - cmd = [self.executable] + self.command_line - cmd += command.split() - - # if no job name is specified, create one - if name is None: - name = command.split()[1] - # generate log directory - log_dir = os.path.join(self.args.grid_log_directory, name) - - # generate job array - if number_of_parallel_jobs > 1: - array = (1,number_of_parallel_jobs,1) - else: - array = None - - # submit the job to the job manager - if not self.args.dry_run: - if(self.env is not None): - kwargs['env'] = self.env - - job_id = self.job_manager.submit( - command_line = cmd, - name = name, - array = array, - dependencies = dependencies, - log_dir = log_dir, - stop_on_failure = self.args.stop_on_failure, - **kwargs - ) - logger.info("submitted: job '%s' with id '%d' and dependencies '%s'" % (name, job_id, dependencies)) - self.submitted_job_ids.append(job_id) - return job_id - else: - self.fake_job_id += 1 - print ('would have submitted job', name, 'with id', self.fake_job_id, 'with parameters', kwargs, end='') - if array: - print (' using', array[1], 'parallel jobs', end='') - print (' as:', command_line(cmd), '\nwith dependencies', dependencies) - return self.fake_job_id - - - def execute_local(self): - """Starts the local deamon and waits until it has finished.""" - logger.info("Starting jman deamon to run the jobs on the local machine.") - failures = self.job_manager.run_scheduler(job_ids=self.submitted_job_ids, parallel_jobs=self.args.grid.number_of_parallel_processes, sleep_time=self.args.grid.scheduler_sleep_time, die_when_finished=True, nice=self.args.nice) - if failures: - logger.error("The jobs with the following IDS did not finish successfully: '%s'.", ', '.join([str(f) for f in failures])) - self.job_manager.report(job_ids = failures[:1], output=False) - - # delete the jobs that we have added - if self.args.delete_jobs_finished_with_status is not None: - logger.info("Deleting jman jobs that we have added") - status = ('success', 'failure') if self.args.delete_jobs_finished_with_status == 'all' else (self.args.delete_jobs_finished_with_status,) - self.job_manager.delete(job_ids=self.submitted_job_ids, status=status) diff --git a/bob/bio/base/tools/preprocessor.py b/bob/bio/base/tools/preprocessor.py deleted file mode 100644 index 93be4116..00000000 --- a/bob/bio/base/tools/preprocessor.py +++ /dev/null @@ -1,138 +0,0 @@ -import bob.io.base -import os - -import logging -import inspect -logger = logging.getLogger("bob.bio.base") - -from .FileSelector import FileSelector -from .. import utils - - -def preprocess(preprocessor, groups = None, indices = None, allow_missing_files = False, force = False): - """Preprocesses the original data of the database with the given preprocessor. - - The given ``preprocessor`` is used to preprocess all data required for the current experiment. - It writes the preprocessed data into the directory specified by the :py:class:`bob.bio.base.tools.FileSelector`. - By default, if target files already exist, they are not re-created. - - **Parameters:** - - preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived - The preprocessor, which should be applied to all data. - - groups : some of ``('world', 'dev', 'eval')`` or ``None`` - The list of groups, for which the data should be preprocessed. - - indices : (int, int) or None - If specified, only the data for the given index range ``range(begin, end)`` should be preprocessed. - This is usually given, when parallel threads are executed. - - allow_missing_files : bool - If set to ``True``, files for which the preprocessor returns ``None`` are silently ignored. - - force : bool - If given, files are regenerated, even if they already exist. - """ - if not preprocessor.writes_data: - # The preprocessor does not write anything, so no need to call it - logger.info("Skipping preprocessing as preprocessor does not write any data") - return - - # the file selector object - fs = FileSelector.instance() - - # get the file lists - data_files = fs.original_data_list(groups=groups) - original_directory, original_extension = fs.original_directory_and_extension() - preprocessed_data_files = fs.preprocessed_data_list(groups=groups) - - if utils.is_argument_available("metadata", preprocessor.__call__): - metadata = fs.original_data_list(groups=groups) - else: - metadata = None - - # select a subset of keys to iterate - if indices is not None: - index_range = range(indices[0], indices[1]) - logger.info("- Preprocessing: splitting of index range %s", str(indices)) - else: - index_range = range(len(data_files)) - - logger.info("- Preprocessing: processing %d data files from directory '%s' to directory '%s'", len(index_range), fs.directories['original'], fs.directories['preprocessed']) - - # read annotation files - annotation_list = fs.annotation_list(groups=groups) - - # iterate over the selected files - for i in index_range: - preprocessed_data_file = preprocessed_data_files[i] - file_object = data_files[i] - file_name = file_object.make_path(original_directory, original_extension) - - # check for existence - if not utils.check_file(preprocessed_data_file, force, - preprocessor.min_preprocessed_file_size): - logger.debug("... Processing original data file '%s' (%d/%d)", file_name, - index_range.index(i)+1, len(index_range)) - - data = preprocessor.read_original_data(file_object, original_directory, original_extension) - # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere) - bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file)) - - # get the annotations; might be None - annotations = fs.get_annotations(annotation_list[i]) - - # call the preprocessor - if metadata is None: - preprocessed_data = preprocessor(data, annotations) - else: - preprocessed_data = preprocessor(data, annotations, metadata=metadata[i]) - - if preprocessed_data is None: - if allow_missing_files: - logger.debug("... Processing original data file '%s' was not successful", file_name) - continue - else: - raise RuntimeError("Preprocessing of file '%s' was not successful" % file_name) - - # write the data - preprocessor.write_data(preprocessed_data, preprocessed_data_file) - - else: - logger.debug("... Skipping original data file '%s' since preprocessed data '%s' exists", file_name, preprocessed_data_file) - - -def read_preprocessed_data(file_names, preprocessor, split_by_client = False, allow_missing_files = False): - """read_preprocessed_data(file_names, preprocessor, split_by_client = False) -> preprocessed - - Reads the preprocessed data from ``file_names`` using the given preprocessor. - If ``split_by_client`` is set to ``True``, it is assumed that the ``file_names`` are already sorted by client. - - **Parameters:** - - file_names : [str] or [[str]] - A list of names of files to be read. - If ``split_by_client = True``, file names are supposed to be split into groups. - - preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived - The preprocessor, which can read the preprocessed data. - - split_by_client : bool - Indicates if the given ``file_names`` are split into groups. - - allow_missing_files : bool - If set to ``True``, preprocessed data files that are not found are silently ignored. - - **Returns:** - - preprocessed : [object] or [[object]] - The list of preprocessed data, in the same order as in the ``file_names``. - """ - file_names = utils.filter_missing_files(file_names, split_by_client, allow_missing_files and preprocessor.writes_data) - - if split_by_client: - preprocessed = [[preprocessor.read_data(f) for f in client_files] for client_files in file_names] - else: - preprocessed = [preprocessor.read_data(f) for f in file_names] - return utils.filter_none(preprocessed, split_by_client) diff --git a/bob/bio/base/tools/scoring.py b/bob/bio/base/tools/scoring.py deleted file mode 100644 index eb634b96..00000000 --- a/bob/bio/base/tools/scoring.py +++ /dev/null @@ -1,596 +0,0 @@ -import bob.io.base -import bob.learn.em -import bob.learn.linear -import numpy -import os, sys -import tarfile -import inspect - -import logging -logger = logging.getLogger("bob.bio.base") - -from .FileSelector import FileSelector -from .. import utils -from .. import score - -def _scores(algorithm, reader, model, probe_objects, allow_missing_files): - """Compute scores for the given model and a list of probes. - """ - # the file selector object - fs = FileSelector.instance() - # get probe files - probes = fs.get_paths(probe_objects, 'projected' if algorithm.performs_projection else 'extracted') - # the scores to be computed; initialized with NaN - scores = numpy.ones((1,len(probes)), numpy.float64) * numpy.nan - - if allow_missing_files and model is None: - # if we have no model, all scores are undefined - return scores - - # Checking if we need to ship the metadata in the scoring method - has_metadata = utils.is_argument_available("metadata", algorithm.score) - - # Loops over the probe sets - for i, probe_element, probe_metadata in zip(range(len(probes)), probes, probe_objects): - if fs.uses_probe_file_sets(): - assert isinstance(probe_element, list) - # filter missing files - if allow_missing_files: - probe_element = utils.filter_missing_files(probe_element, probe_objects) - if not probe_element: - # we keep the NaN score - continue - # read probe from probe_set - probe = [reader.read_feature(probe_file) for probe_file in probe_element] - # compute score - scores[0,i] = algorithm.score_for_multiple_probes(model, probe) - else: - if allow_missing_files and not os.path.exists(probe_element): - # we keep the NaN score - continue - # read probe - probe = reader.read_feature(probe_element) - - # compute score - if has_metadata: - scores[0, i] = algorithm.score(model, probe, metadata=probe_metadata) - else: - scores[0, i] = algorithm.score(model, probe) - - # Returns the scores - return scores - - -def _open_to_read(score_file): - """Checks for the existence of the normal and the compressed version of the file, and calls :py:func:`bob.bio.base.score.open_file` for the existing one.""" - if not os.path.exists(score_file): - score_file += '.tar.bz2' - if not os.path.exists(score_file): - raise IOError("The score file '%s' cannot be found. Aborting!" % score_file) - return score.open_file(score_file) - - -def _open_to_write(score_file, write_compressed): - """Opens the given score file for writing. If write_compressed is set to ``True``, a file-like structure is returned.""" - bob.io.base.create_directories_safe(os.path.dirname(score_file)) - if write_compressed: - if sys.version_info[0] <= 2: - import StringIO - f = StringIO.StringIO() - else: - import io - f = io.BytesIO() - score_file += '.tar.bz2' - else: - f = open(score_file, 'w') - - return f - -def _write(f, data, write_compressed): - """Writes the given data to file, after converting it to the required type.""" - if write_compressed: - if sys.version_info[0] > 2: - data = str.encode(data) - - f.write(data) - -def _close_written(score_file, f, write_compressed): - """Closes the file f that was opened with :py:func:`_open_to_read`""" - if write_compressed: - f.seek(0) - tarinfo = tarfile.TarInfo(os.path.basename(score_file)) - tarinfo.size = len(f.buf if sys.version_info[0] <= 2 else f.getbuffer()) - tar = tarfile.open(score_file, 'w') - tar.addfile(tarinfo, f) - tar.close() - # close the file - f.close() - -def _delete(score_file, write_compressed): - """Deletes the (compressed) score_file""" - if write_compressed: - score_file += '.tar.bz2' - if os.path.isfile(score_file): - os.remove(score_file) - - -def _save_scores(score_file, scores, probe_objects, client_id, write_compressed): - """Saves the scores of one model into a text file that can be interpreted by - :py:func:`bob.bio.base.score.split_four_column`.""" - assert len(probe_objects) == scores.shape[1] - - # open file for writing - f = _open_to_write(score_file, write_compressed) - - # write scores in four-column format as string - for i, probe_object in enumerate(probe_objects): - _write(f, "%s %s %s %3.8f\n" % (str(client_id), str(probe_object.client_id), str(probe_object.path), scores[0,i]), write_compressed) - - _close_written(score_file, f, write_compressed) - - -def _scores_a(algorithm, reader, model_ids, group, compute_zt_norm, force, write_compressed, allow_missing_files): - """Computes A scores for the models with the given model_ids. If ``compute_zt_norm = False``, these are the only scores that are actually computed.""" - # the file selector object - fs = FileSelector.instance() - - if compute_zt_norm: - logger.info("- Scoring: computing score matrix A for group '%s'", group) - else: - logger.info("- Scoring: computing scores for group '%s'", group) - - # Computes the raw scores for each model - for pos, model_id in enumerate(model_ids): - # test if the file is already there - score_file = fs.a_file(model_id, group) if compute_zt_norm else fs.no_norm_file(model_id, group) - logger.debug("... Scoring model '%s' at '%s' (%d/%d)", model_id, score_file, - pos+1, len(model_ids)) - if utils.check_file(score_file, force): - logger.warn("Score file '%s' already exists.", score_file) - else: - # get probe files that are required for this model - current_probe_objects = fs.probe_objects_for_model(model_id, group) - model_file = fs.model_file(model_id, group) - if allow_missing_files and not os.path.exists(model_file): - model = None - else: - model = algorithm.read_model(model_file) - # compute scores - a = _scores(algorithm, reader, model, current_probe_objects, allow_missing_files) - - if compute_zt_norm: - # write A matrix only when you want to compute zt norm afterwards - bob.io.base.save(a, fs.a_file(model_id, group), True) - - # Save scores to text file - _save_scores(fs.no_norm_file(model_id, group), a, current_probe_objects, fs.client_id(model_id, group), write_compressed) - - -def _scores_b(algorithm, reader, model_ids, group, force, allow_missing_files): - """Computes B scores for the given model ids.""" - # the file selector object - fs = FileSelector.instance() - - # probe files: - z_probe_objects = fs.z_probe_objects(group) - - logger.info("- Scoring: computing score matrix B for group '%s'", group) - - # Loads the models - for pos, model_id in enumerate(model_ids): - # test if the file is already there - score_file = fs.b_file(model_id, group) - logger.debug("... Scoring model '%s' at '%s' (%d/%d)", model_id, - score_file, pos+1, len(model_ids)) - if utils.check_file(score_file, force): - logger.warn("Score file '%s' already exists.", score_file) - else: - model_file = fs.model_file(model_id, group) - if allow_missing_files and not os.path.exists(model_file): - model = None - else: - model = algorithm.read_model(model_file) - b = _scores(algorithm, reader, model, z_probe_objects, allow_missing_files) - bob.io.base.save(b, score_file, True) - -def _scores_c(algorithm, reader, t_model_ids, group, force, allow_missing_files): - """Computes C scores for the given t-norm model ids.""" - # the file selector object - fs = FileSelector.instance() - - # probe files: - probe_objects = fs.probe_objects(group) - - logger.info("- Scoring: computing score matrix C for group '%s'", group) - - # Computes the raw scores for the T-Norm model - for pos, t_model_id in enumerate(t_model_ids): - # test if the file is already there - score_file = fs.c_file(t_model_id, group) - logger.debug("... Scoring model '%s' at '%s' (%d/%d)", t_model_id, - score_file, pos+1, len(t_model_ids)) - if utils.check_file(score_file, force): - logger.warn("Score file '%s' already exists.", score_file) - else: - t_model_file = fs.t_model_file(t_model_id, group) - if allow_missing_files and not os.path.exists(t_model_file): - t_model = None - else: - t_model = algorithm.read_model(t_model_file) - c = _scores(algorithm, reader, t_model, probe_objects, allow_missing_files) - bob.io.base.save(c, score_file, True) - -def _scores_d(algorithm, reader, t_model_ids, group, force, allow_missing_files): - """Computes D scores for the given t-norm model ids. Both the D matrix and the D-samevalue matrix are written.""" - # the file selector object - fs = FileSelector.instance() - - # probe files: - z_probe_objects = fs.z_probe_objects(group) - - logger.info("- Scoring: computing score matrix D for group '%s'", group) - - # Gets the Z-Norm impostor samples - z_probe_ids = [z_probe_object.client_id for z_probe_object in z_probe_objects] - - # Loads the T-Norm models - for pos, t_model_id in enumerate(t_model_ids): - # test if the file is already there - score_file = fs.d_file(t_model_id, group) - logger.debug("... Scoring model '%s' at '%s' (%d/%d)", t_model_id, - score_file, pos+1, len(t_model_ids)) - same_score_file = fs.d_same_value_file(t_model_id, group) - if utils.check_file(score_file, force) and utils.check_file(same_score_file, force): - logger.warn("score files '%s' and '%s' already exist.", score_file, same_score_file) - else: - t_model_file = fs.t_model_file(t_model_id, group) - if allow_missing_files and not os.path.exists(t_model_file): - t_model = None - else: - t_model = algorithm.read_model(t_model_file) - d = _scores(algorithm, reader, t_model, z_probe_objects, allow_missing_files) - bob.io.base.save(d, score_file, True) - - t_client_id = [fs.client_id(t_model_id, group, True)] - d_same_value_tm = bob.learn.em.ztnorm_same_value(t_client_id, z_probe_ids) - bob.io.base.save(d_same_value_tm, same_score_file, True) - - -def compute_scores(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev', 'eval'], types = ['A', 'B', 'C', 'D'], write_compressed = False, allow_missing_files = False, force = False): - """Computes the scores for the given groups. - - This function computes all scores for the experiment, and writes them to files, one per model. - When ``compute_zt_norm`` is enabled, scores are computed for all four matrices, i.e. A: normal scores; B: Z-norm scores; C: T-norm scores; D: ZT-norm scores and ZT-samevalue scores. - By default, scores are computed for both groups ``'dev'`` and ``'eval'``. - - **Parameters:** - - algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived - The algorithm, used for enrolling model and writing them to file. - - extractor : py:class:`bob.bio.base.extractor.Extractor` or derived - The extractor, used for extracting the features. - The extractor is only used to read features, if the algorithm does not perform projection. - - compute_zt_norm : bool - If set to ``True``, also ZT-norm scores are computed. - - indices : (int, int) or None - If specified, scores are computed only for the models in the given index range ``range(begin, end)``. - This is usually given, when parallel threads are executed. - - .. note:: The probe files are not limited by the ``indices``. - - groups : some of ``('dev', 'eval')`` - The list of groups, for which scores should be computed. - - types : some of ``['A', 'B', 'C', 'D']`` - A list of score types to be computed. - If ``compute_zt_norm = False``, only the ``'A'`` scores are computed. - - write_compressed : bool - If enabled, score files are compressed as ``.tar.bz2`` files. - - allow_missing_files : bool - If set to ``True``, model and probe files that are not found will produce ``NaN`` scores. - - force : bool - If given, score files are regenerated, even if they already exist. - """ - # the file selector object - fs = FileSelector.instance() - - # load the projector and the enroller, if needed - if algorithm.performs_projection: - algorithm.load_projector(fs.projector_file) - algorithm.load_enroller(fs.enroller_file) - - # which tool to use to read the probes - if algorithm.performs_projection: - reader = algorithm - else: - reader = extractor - # make sure that the extractor is loaded - extractor.load(fs.extractor_file) - - for group in groups: - # get model ids - model_ids = fs.model_ids(group) - if indices is not None: - model_ids = model_ids[indices[0]:indices[1]] - logger.info("- Scoring: splitting of index range %s", str(indices)) - if compute_zt_norm: - t_model_ids = fs.t_model_ids(group) - if indices is not None: - t_model_ids = t_model_ids[indices[0]:indices[1]] - - # compute A scores - if 'A' in types: - _scores_a(algorithm, reader, model_ids, group, compute_zt_norm, force, write_compressed, allow_missing_files) - - if compute_zt_norm: - # compute B scores - if 'B' in types: - _scores_b(algorithm, reader, model_ids, group, force, allow_missing_files) - - # compute C scores - if 'C' in types: - _scores_c(algorithm, reader, t_model_ids, group, force, allow_missing_files) - - # compute D scores - if 'D' in types: - _scores_d(algorithm, reader, t_model_ids, group, force, allow_missing_files) - - - -def _c_matrix_split_for_model(selected_probe_objects, all_probe_objects, all_c_scores): - """Helper function to sub-select the c-scores in case not all probe files were used to compute A scores.""" - c_scores_for_model = numpy.empty((all_c_scores.shape[0], len(selected_probe_objects)), numpy.float64) - selected_index = 0 - for all_index in range(len(all_probe_objects)): - if selected_index < len(selected_probe_objects) and selected_probe_objects[selected_index].id == all_probe_objects[all_index].id: - c_scores_for_model[:,selected_index] = all_c_scores[:,all_index] - selected_index += 1 - assert selected_index == len(selected_probe_objects) - - # return the split database - return c_scores_for_model - -def _scores_c_normalize(model_ids, t_model_ids, group): - """Compute normalized probe scores using T-model scores.""" - # the file selector object - fs = FileSelector.instance() - - # read all tmodel scores - c_for_all = None - for t_model_id in t_model_ids: - tmp = bob.io.base.load(fs.c_file(t_model_id, group)) - if c_for_all is None: - c_for_all = tmp - else: - c_for_all = numpy.vstack((c_for_all, tmp)) - - # iterate over all models and generate C matrices for that specific model - all_probe_objects = fs.probe_objects(group) - for model_id in model_ids: - # select the correct probe files for the current model - probe_objects_for_model = fs.probe_objects_for_model(model_id, group) - c_matrix_for_model = _c_matrix_split_for_model(probe_objects_for_model, all_probe_objects, c_for_all) - # Save C matrix to file - bob.io.base.save(c_matrix_for_model, fs.c_file_for_model(model_id, group)) - -def _scores_d_normalize(t_model_ids, group): - """Compute normalized D scores for the given T-model ids""" - # the file selector object - fs = FileSelector.instance() - - # initialize D and D_same_value matrices - d_for_all = None - d_same_value = None - for t_model_id in t_model_ids: - tmp = bob.io.base.load(fs.d_file(t_model_id, group)) - tmp2 = bob.io.base.load(fs.d_same_value_file(t_model_id, group)) - if d_for_all is None and d_same_value is None: - d_for_all = tmp - d_same_value = tmp2 - else: - d_for_all = numpy.vstack((d_for_all, tmp)) - d_same_value = numpy.vstack((d_same_value, tmp2)) - - # Saves to files - bob.io.base.save(d_for_all, fs.d_matrix_file(group)) - bob.io.base.save(d_same_value, fs.d_same_value_matrix_file(group)) - - - -def zt_norm(groups = ['dev', 'eval'], write_compressed = False, allow_missing_files = False): - """Computes ZT-Norm using the previously generated A, B, C, D and D-samevalue matrix files. - - This function computes the ZT-norm scores for all model ids for all desired groups and writes them into files defined by the :py:class:`bob.bio.base.tools.FileSelector`. - It loads the A, B, C, D and D-samevalue matrix files that need to be computed beforehand. - - **Parameters:** - - groups : some of ``('dev', 'eval')`` - The list of groups, for which ZT-norm should be applied. - - write_compressed : bool - If enabled, score files are compressed as ``.tar.bz2`` files. - - allow_missing_files : bool - Currently, this option is only provided for completeness. - ``NaN`` scores are not yet handled correctly. - """ - # the file selector object - fs = FileSelector.instance() - - for group in groups: - logger.info("- Scoring: computing ZT-norm for group '%s'", group) - # list of models - model_ids = fs.model_ids(group) - t_model_ids = fs.t_model_ids(group) - - # first, normalize C and D scores - _scores_c_normalize(model_ids, t_model_ids, group) - # and normalize it - _scores_d_normalize(t_model_ids, group) - - # load D matrices only once - d = bob.io.base.load(fs.d_matrix_file(group)) - d_same_value = bob.io.base.load(fs.d_same_value_matrix_file(group)).astype(bool) - error_log_done = False - # Loops over the model ids - for model_id in model_ids: - # Loads probe files to get information about the type of access - probe_objects = fs.probe_objects_for_model(model_id, group) - - # Loads A, B, and C matrices for current model id - a = bob.io.base.load(fs.a_file(model_id, group)) - b = bob.io.base.load(fs.b_file(model_id, group)) - c = bob.io.base.load(fs.c_file_for_model(model_id, group)) - - # compute zt scores - if allow_missing_files: - # TODO: handle NaN scores, i.e., when allow_missing_files is enabled - if not error_log_done and any(numpy.any(numpy.isnan(x)) for x in (a,b,c,d,d_same_value)): - logger.error("There are NaN scores inside one of the score files for group %s; ZT-Norm will not work", group) - error_log_done = True - - zt_scores = bob.learn.em.ztnorm(a, b, c, d, d_same_value) - - # Saves to text file - _save_scores(fs.zt_norm_file(model_id, group), zt_scores, probe_objects, fs.client_id(model_id, group), write_compressed) - - - -def _concat(score_files, output, write_compressed, model_ids): - """Concatenates a list of score files into a single score file.""" - try: - f = _open_to_write(output, write_compressed) - - # Concatenates the scores - if model_ids is None: - for score_file in score_files: - i = _open_to_read(score_file) - f.write(i.read()) - else: - for score_file, model_id in zip(score_files, model_ids): - i = _open_to_read(score_file) - for l in i: - s = l.split() - s.insert(1, str(model_id)) - f.write(" ".join(s) + "\n") - - except: - logger.error("Concatenation failed; removing result file %s", output) - _close_written(output, f, write_compressed) - _delete(output, write_compressed) - raise - else: - _close_written(output, f, write_compressed) - - - -def concatenate(compute_zt_norm, groups = ['dev', 'eval'], write_compressed = False, add_model_id = False): - """Concatenates all results into one (or two) score files per group. - - Score files, which were generated per model, are concatenated into a single - score file, which can be interpreter by - :py:func:`bob.bio.base.score.load.split_four_column`. - The score files are always re-computed, regardless if they exist or not. - - **Parameters:** - - compute_zt_norm : bool - If set to ``True``, also score files for ZT-norm are concatenated. - - groups : some of ``('dev', 'eval')`` - The list of groups, for which score files should be concatenated. - - write_compressed : bool - If enabled, concatenated score files are compressed as ``.tar.bz2`` files. - """ - # the file selector object - fs = FileSelector.instance() - for group in groups: - logger.info("- Scoring: concatenating score files for group '%s'", group) - # (sorted) list of models - model_ids = fs.model_ids(group) - model_files = [fs.no_norm_file(model_id, group) for model_id in model_ids] - result_file = fs.no_norm_result_file(group) - _concat(model_files, result_file, write_compressed, model_ids if add_model_id else None) - logger.info("- Scoring: wrote score file '%s'", result_file) - - if compute_zt_norm: - model_files = [fs.zt_norm_file(model_id, group) for model_id in model_ids] - result_file = fs.zt_norm_result_file(group) - _concat(model_files, result_file, write_compressed, model_ids if add_model_id else None) - logger.info("- Scoring: wrote score file '%s'", result_file) - - -def calibrate(compute_zt_norm, groups = ['dev', 'eval'], prior = 0.5, write_compressed = False): - """Calibrates the score files by learning a linear calibration from the dev files (first element of the groups) and executing the on all groups. - - This function is intended to compute the calibration parameters on the scores of the development set using the :py:class:`bob.learn.linear.CGLogRegTrainer`. - Afterward, both the scores of the development and evaluation sets are calibrated and written to file. - For ZT-norm scores, the calibration is performed independently, if enabled. - The names of the calibrated score files that should be written are obtained from the :py:class:`bob.bio.base.tools.FileSelector`. - - .. note:: - All ``NaN`` scores in the development set are silently ignored. - This might raise an error, if **all** scores are ``NaN``. - - **Parameters:** - - compute_zt_norm : bool - If set to ``True``, also score files for ZT-norm are calibrated. - - groups : some of ``('dev', 'eval')`` - The list of groups, for which score files should be calibrated. - The first of the given groups is used to train the logistic regression parameters, while the calibration is performed for all given groups. - - prior : float - Whatever :py:class:`bob.learn.linear.CGLogRegTrainer` takes as a ``prior``. - - write_compressed : bool - If enabled, calibrated score files are compressed as ``.tar.bz2`` files. - """ - # the file selector object - fs = FileSelector.instance() - # read score files of the first group (assuming that the first group is 'dev') - norms = ['nonorm', 'ztnorm'] if compute_zt_norm else ["nonorm"] - for norm in norms: - training_score_file = fs.no_norm_result_file(groups[0]) if norm == 'nonorm' else fs.zt_norm_result_file(groups[0]) if norm == 'ztnorm' else None - - # create a LLR trainer - logger.info(" - Calibration: Training calibration for type %s from group %s", norm, groups[0]) - llr_trainer = bob.learn.linear.CGLogRegTrainer(prior, 1e-16, 100000) - - training_scores = list(score.split_four_column(training_score_file)) - for i in (0,1): - h = numpy.array(training_scores[i]) - # remove NaN's - h = h[~numpy.isnan(h)] - training_scores[i] = h[:,numpy.newaxis] - # train the LLR - llr_machine = llr_trainer.train(training_scores[0], training_scores[1]) - del training_scores - logger.debug(" ... Resulting calibration parameters: shift = %f, scale = %f", llr_machine.biases[0], llr_machine.weights[0,0]) - - # now, apply it to all groups - for group in groups: - score_file = fs.no_norm_result_file(group) if norm == 'nonorm' else fs.zt_norm_result_file(group) if norm is 'ztnorm' else None - calibrated_file = fs.calibrated_score_file(group, norm == 'ztnorm') - - logger.info(" - Calibration: calibrating scores from '%s' to '%s'", score_file, calibrated_file) - - # iterate through the score file and calibrate scores - scores = score.four_column(_open_to_read(score_file)) - - f = _open_to_write(calibrated_file, write_compressed) - - for line in scores: - assert len(line) == 4, "The line %s of score file %s cannot be interpreted" % (line, score_file) - calibrated_score = llr_machine([line[3]]) - f.write('%s %s %s %3.8f\n' % (line[0], line[1], line[2], calibrated_score[0])) - _close_written(calibrated_file, f, write_compressed) diff --git a/bob/bio/base/utils/__init__.py b/bob/bio/base/utils/__init__.py index 6738ad5e..f2e6b2d4 100644 --- a/bob/bio/base/utils/__init__.py +++ b/bob/bio/base/utils/__init__.py @@ -5,7 +5,6 @@ from .resources import * from .io import * -from .singleton import * import six import inspect import numpy diff --git a/bob/bio/base/utils/singleton.py b/bob/bio/base/utils/singleton.py deleted file mode 100644 index 5bae6ff4..00000000 --- a/bob/bio/base/utils/singleton.py +++ /dev/null @@ -1,44 +0,0 @@ -# A singleton class decorator, based on http://stackoverflow.com/a/7346105/3301902 - -class Singleton(object): - """ - A non-thread-safe helper class to ease implementing singletons. - This should be used as a **decorator** -- not a metaclass -- to the class that should be a singleton. - - The decorated class can define one `__init__` function that takes an arbitrary list of parameters. - - To get the singleton instance, use the :py:meth:`instance` method. Trying to use `__call__` will result in a `TypeError` being raised. - - Limitations: - - * The decorated class cannot be inherited from. - * The documentation of the decorated class is replaced with the documentation of this class. - """ - - def __init__(self, decorated): - self._decorated = decorated - # see: functools.WRAPPER_ASSIGNMENTS: - self.__doc__ = decorated.__doc__ - self.__name__ = decorated.__name__ - self.__module__ = decorated.__module__ - self.__mro__ = decorated.__mro__ - self.__bases__ = [] - - self._instance = None - - def create(self, *args, **kwargs): - """Creates the singleton instance, by passing the given parameters to the class' constructor.""" - self._instance = self._decorated(*args, **kwargs) - - def instance(self): - """Returns the singleton instance. - The function :py:meth:`create` must have been called before.""" - if self._instance is None: - raise RuntimeError("The class has not yet been instantiated using the 'create' method.") - return self._instance - - def __call__(self): - raise TypeError('Singletons must be accessed through the `instance()` method.') - - def __instancecheck__(self, inst): - return isinstance(inst, self._decorated) diff --git a/conda/meta.yaml b/conda/meta.yaml index ecea2f8f..44d9909b 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -7,16 +7,6 @@ package: build: entry_points: - - verify.py = bob.bio.base.script.verify:main - - resources.py = bob.bio.base.script.resources:resources - - databases.py = bob.bio.base.script.resources:databases - - collect_results.py = bob.bio.base.script.collect_results:main - - grid_search.py = bob.bio.base.script.grid_search:main - - preprocess.py = bob.bio.base.script.preprocess:main - - extract.py = bob.bio.base.script.extract:main - - enroll.py = bob.bio.base.script.enroll:main - - score.py = bob.bio.base.script.score:main - - fuse_scores.py = bob.bio.base.script.fuse_scores:main number: {{ environ.get('BOB_BUILD_NUMBER', 0) }} run_exports: - {{ pin_subpackage(name) }} @@ -42,6 +32,7 @@ requirements: - bob.math - bob.measure - bob.sp + - bob.pipelines - scipy {{ scipy }} - six {{ six }} run: @@ -54,16 +45,6 @@ test: imports: - {{ name }} commands: - - verify.py --help - - resources.py --help - - databases.py --help - - collect_results.py --help - - grid_search.py --help - - preprocess.py --help - - extract.py --help - - enroll.py --help - - score.py --help - - fuse_scores.py --help - bob bio --help - bob bio annotate --help - bob bio metrics --help diff --git a/requirements.txt b/requirements.txt index bf7e3e8e..67608425 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ bob.learn.linear bob.math bob.measure bob.sp +bob.pipelines click click-plugins numpy diff --git a/setup.py b/setup.py index 6f2d39ce..75b615eb 100644 --- a/setup.py +++ b/setup.py @@ -70,16 +70,6 @@ setup( # scripts should be declared using this entry: 'console_scripts' : [ - 'verify.py = bob.bio.base.script.verify:main', - 'resources.py = bob.bio.base.script.resources:resources', - 'databases.py = bob.bio.base.script.resources:databases', - 'collect_results.py = bob.bio.base.script.collect_results:main', - 'grid_search.py = bob.bio.base.script.grid_search:main', - 'preprocess.py = bob.bio.base.script.preprocess:main', - 'extract.py = bob.bio.base.script.extract:main', - 'enroll.py = bob.bio.base.script.enroll:main', - 'score.py = bob.bio.base.script.score:main', - 'fuse_scores.py = bob.bio.base.script.fuse_scores:main', ], 'bob.bio.config': [ @@ -117,15 +107,6 @@ setup( 'bic = bob.bio.base.config.algorithm.bic:algorithm', ], - 'bob.bio.grid': [ - 'local-p4 = bob.bio.base.config.grid.local:grid', - 'local-p8 = bob.bio.base.config.grid.local:grid_p8', - 'local-p16 = bob.bio.base.config.grid.local:grid_p16', - 'grid = bob.bio.base.config.grid.grid:grid', - 'demanding = bob.bio.base.config.grid.demanding:grid', - 'gpu = bob.bio.base.config.grid.gpu:grid', - ], - # declare database to bob 'bob.db': [ 'bio_filelist = bob.bio.base.database.filelist.driver:Interface', @@ -158,10 +139,12 @@ setup( 'dummy = bob.bio.base.test.dummy.annotator:annotator', ], - #baselines - 'bob.bio.baseline':[ - 'dummy = bob.bio.base.test.dummy.baseline:baseline', - ], + # run pipelines + 'bob.pipelines.cli':[ + 'vanilla-biometrics = bob.bio.base.script.vanilla_biometrics:vanilla_biometrics', + ], + + }, -- GitLab