Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.bio.base
Commits
92e6d94e
Commit
92e6d94e
authored
Nov 05, 2020
by
Amir MOHAMMADI
Browse files
Merge branch 'dask-pipelines'
parents
49342c2a
a0de0b05
Pipeline
#44909
failed with stages
in 52 seconds
Changes
136
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
.gitignore
View file @
92e6d94e
...
...
@@ -13,3 +13,4 @@ sphinx
dist
build
record.txt
.DS_Store
bob/bio/base/__init__.py
View file @
92e6d94e
...
...
@@ -3,10 +3,7 @@ from . import database
from
.
import
preprocessor
from
.
import
extractor
from
.
import
algorithm
from
.
import
tools
from
.
import
grid
# only one file, not complete directory
from
.
import
annotator
from
.
import
baseline
from
.
import
script
from
.
import
test
...
...
bob/bio/base/algorithm/Algorithm.py
View file @
92e6d94e
...
...
@@ -6,6 +6,8 @@
import
numpy
import
os
from
..
import
utils
import
warnings
class
Algorithm
(
object
):
"""This is the base class for all biometric recognition algorithms.
...
...
@@ -87,6 +89,13 @@ class Algorithm (object):
min_t_model_file_size
=
1000
,
**
kwargs
# parameters from the derived class that should be reported in the __str__() function
):
warnings
.
warn
(
"`bob.bio.base.algorithm.Algorithm` will be deprecated in 01/01/2021. "
\
"Please, implement your biometric algorithm using `bob.pipelines` (https://gitlab.idiap.ch/bob/bob.pipelines)."
,
DeprecationWarning
)
self
.
performs_projection
=
performs_projection
self
.
requires_projector_training
=
performs_projection
and
requires_projector_training
self
.
split_training_features_by_client
=
split_training_features_by_client
...
...
@@ -205,10 +214,11 @@ class Algorithm (object):
score : float
The fused similarity between the given ``models`` and the ``probe``.
"""
if
isinstance
(
models
,
list
):
return
self
.
model
_fusion_function
(
[
self
.
score
(
model
,
probe
)
for
model
in
models
]
)
return
[
self
.
probe
_fusion_function
(
self
.
score
(
model
,
probe
)
)
for
model
in
models
]
elif
isinstance
(
models
,
numpy
.
ndarray
):
return
self
.
model
_fusion_function
(
[
self
.
score
(
models
[
i
,:],
probe
)
for
i
in
range
(
models
.
shape
[
0
])]
)
return
[
self
.
probe
_fusion_function
(
self
.
score
(
models
[
i
,:],
probe
)
)
for
i
in
range
(
models
.
shape
[
0
])]
else
:
raise
ValueError
(
"The model does not have the desired format (list, array, ...)"
)
...
...
bob/bio/base/annotator/Annotator.py
View file @
92e6d94e
...
...
@@ -9,7 +9,7 @@ class Annotator(object):
----------
read_original_data : callable
A function that loads the samples. The syntax is like
:any:
`bob.bio.base.read_original_data`.
`bob.bio.base.read_original_data`.
"""
def
__init__
(
self
,
read_original_data
=
None
,
**
kwargs
):
...
...
bob/bio/base/baseline/Baseline.py
deleted
100644 → 0
View file @
49342c2a
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
from
..
import
resource_keys
,
load_resource
def
search_preprocessor
(
db_name
,
keys
):
"""
Wrapper that searches for preprocessors for specific databases.
If not found, the default preprocessor is returned
"""
for
k
in
keys
:
if
db_name
.
startswith
(
k
):
return
k
else
:
return
"default"
def
get_available_databases
():
"""
Get all the available databases through the database entry-points
"""
available_databases
=
dict
()
all_databases
=
resource_keys
(
'database'
,
strip
=
[])
for
database
in
all_databases
:
try
:
database_entry_point
=
load_resource
(
database
,
'database'
)
available_databases
[
database
]
=
dict
()
# Checking if the database has data for the ZT normalization
available_databases
[
database
][
"has_zt"
]
=
hasattr
(
database_entry_point
,
"zobjects"
)
and
hasattr
(
database_entry_point
,
"tobjects"
)
available_databases
[
database
][
"groups"
]
=
[]
# Searching for database groups
try
:
groups
=
list
(
database_entry_point
.
groups
())
or
[
"dev"
]
for
g
in
[
"dev"
,
"eval"
]:
available_databases
[
database
][
"groups"
]
+=
[
g
]
if
g
in
groups
else
[]
except
Exception
:
# In case the method groups is not implemented
available_databases
[
database
][
"groups"
]
=
[
"dev"
]
except
Exception
:
pass
return
available_databases
class
Baseline
(
object
):
"""
Base class to define baselines
A Baseline is composed by the triplet
:any:`bob.bio.base.preprocessor.Preprocessor`,
:any:`bob.bio.base.extractor.Extractor`, and
:any:`bob.bio.base.algorithm.Algorithm`
Attributes
----------
name : str
Name of the baseline. This name will be displayed in the command line
interface.
preprocessors : dict
Dictionary containing all possible preprocessors
extractor : str
Registered resource or a config file containing the feature extractor
algorithm : str
Registered resource or a config file containing the algorithm
"""
def
__init__
(
self
,
name
,
preprocessors
,
extractor
,
algorithm
,
**
kwargs
):
super
(
Baseline
,
self
).
__init__
(
**
kwargs
)
self
.
name
=
name
self
.
preprocessors
=
preprocessors
self
.
extractor
=
extractor
self
.
algorithm
=
algorithm
bob/bio/base/baseline/__init__.py
deleted
100755 → 0
View file @
49342c2a
from
.Baseline
import
Baseline
,
search_preprocessor
,
get_available_databases
def
get_config
():
"""Returns a string containing the configuration information.
"""
import
bob.extension
return
bob
.
extension
.
get_config
(
__name__
)
# gets sphinx autodoc done right - don't remove it
def
__appropriate__
(
*
args
):
"""Says object was actually declared here, and not in the import module.
Fixing sphinx warnings of not being able to find classes, when path is
shortened. Parameters:
*args: An iterable of objects to modify
Resolves `Sphinx referencing issues
<https://github.com/sphinx-doc/sphinx/issues/3048>`
"""
for
obj
in
args
:
obj
.
__module__
=
__name__
__appropriate__
(
Baseline
,
)
__all__
=
[
_
for
_
in
dir
()
if
not
_
.
startswith
(
'_'
)]
bob/bio/base/config/algorithm/pca.py
View file @
92e6d94e
#!/usr/bin/env python
import
bob.bio.base
import
scipy.spatial
algorithm
=
bob
.
bio
.
base
.
algorithm
.
PCA
(
subspace_dimension
=
.
95
,
distance_function
=
scipy
.
spatial
.
distance
.
euclidean
,
is_distance_function
=
True
)
from
bob.bio.base.algorithm
import
PCA
algorithm
=
PCA
(
0.99
)
\ No newline at end of file
bob/bio/base/config/grid/__init__.py
deleted
100644 → 0
View file @
49342c2a
bob/bio/base/config/grid/demanding.py
deleted
100644 → 0
View file @
49342c2a
import
bob.bio.base
# define a queue with demanding parameters
grid
=
bob
.
bio
.
base
.
grid
.
Grid
(
training_queue
=
'32G'
,
# preprocessing
preprocessing_queue
=
'4G-io-big'
,
# feature extraction
extraction_queue
=
'8G-io-big'
,
# feature projection
projection_queue
=
'8G-io-big'
,
# model enrollment
enrollment_queue
=
'8G-io-big'
,
# scoring
scoring_queue
=
'8G-io-big'
)
bob/bio/base/config/grid/gpu.py
deleted
100644 → 0
View file @
49342c2a
import
bob.bio.base
# define a queue with demanding parameters
grid
=
bob
.
bio
.
base
.
grid
.
Grid
(
training_queue
=
'GPU'
,
# preprocessing
preprocessing_queue
=
'4G'
,
# feature extraction
extraction_queue
=
'GPU'
,
# feature projection
projection_queue
=
'4G'
,
# model enrollment
enrollment_queue
=
'4G'
,
# scoring
scoring_queue
=
'4G'
)
bob/bio/base/config/grid/grid.py
deleted
100644 → 0
View file @
49342c2a
import
bob.bio.base
# define the queue using all the default parameters
grid
=
bob
.
bio
.
base
.
grid
.
Grid
()
bob/bio/base/config/grid/local.py
deleted
100644 → 0
View file @
49342c2a
import
bob.bio.base
# define the queue using all the default parameters
grid
=
bob
.
bio
.
base
.
grid
.
Grid
(
grid_type
=
'local'
,
number_of_parallel_processes
=
4
)
# define a queue that is highly parallelized
grid_p8
=
bob
.
bio
.
base
.
grid
.
Grid
(
grid_type
=
'local'
,
number_of_parallel_processes
=
8
)
# define a queue that is highly parallelized
grid_p16
=
bob
.
bio
.
base
.
grid
.
Grid
(
grid_type
=
'local'
,
number_of_parallel_processes
=
16
)
bob/bio/base/database/__init__.py
View file @
92e6d94e
from
.csv_dataset
import
CSVDatasetDevEval
,
CSVToSampleLoader
,
CSVDatasetCrossValidation
from
.file
import
BioFile
from
.file
import
BioFileSet
from
.database
import
BioDatabase
...
...
@@ -5,6 +6,7 @@ from .database import ZTBioDatabase
from
.filelist
import
FileListBioDatabase
from
.
import
filelist
# gets sphinx autodoc done right - don't remove it
def
__appropriate__
(
*
args
):
"""Says object was actually declared here, and not in the import module.
...
...
@@ -25,7 +27,9 @@ __appropriate__(
BioFile
,
BioFileSet
,
BioDatabase
,
ZTBioDatabase
,
FileListBioDatabase
ZTBioDatabase
,
CSVDatasetDevEval
,
CSVToSampleLoader
,
CSVDatasetCrossValidation
)
__all__
=
[
_
for
_
in
dir
()
if
not
_
.
startswith
(
'_'
)]
bob/bio/base/database/csv_dataset.py
0 → 100644
View file @
92e6d94e
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
import
os
from
bob.pipelines
import
Sample
,
DelayedSample
,
SampleSet
import
csv
import
bob.io.base
import
functools
from
abc
import
ABCMeta
,
abstractmethod
import
numpy
as
np
import
itertools
class
CSVBaseSampleLoader
(
metaclass
=
ABCMeta
):
"""
Convert CSV files in the format below to either a list of
:any:`bob.pipelines.DelayedSample` or :any:`bob.pipelines.SampleSet`
.. code-block:: text
PATH,SUBJECT
path_1,subject_1
path_2,subject_2
path_i,subject_j
...
.. note::
This class should be extended
Parameters
----------
data_loader:
A python function that can be called parameterlessly, to load the
sample in question from whatever medium
extension:
The file extension
"""
def
__init__
(
self
,
data_loader
,
dataset_original_directory
=
""
,
extension
=
""
):
self
.
data_loader
=
data_loader
self
.
extension
=
extension
self
.
dataset_original_directory
=
dataset_original_directory
self
.
excluding_attributes
=
[
"_data"
,
"load"
,
"key"
]
@
abstractmethod
def
__call__
(
self
,
filename
):
pass
@
abstractmethod
def
convert_row_to_sample
(
self
,
row
,
header
):
pass
@
abstractmethod
def
convert_samples_to_samplesets
(
self
,
samples
,
group_by_subject
=
True
,
references
=
None
):
pass
class
CSVToSampleLoader
(
CSVBaseSampleLoader
):
"""
Simple mechanism to convert CSV files in the format below to either a list of
:any:`bob.pipelines.DelayedSample` or :any:`bob.pipelines.SampleSet`
"""
def
__call__
(
self
,
filename
):
def
check_header
(
header
):
"""
A header should have at least "SUBJECT" AND "PATH"
"""
header
=
[
h
.
lower
()
for
h
in
header
]
if
not
"subject"
in
header
:
raise
ValueError
(
"The field `subject` is not available in your dataset."
)
if
not
"path"
in
header
:
raise
ValueError
(
"The field `path` is not available in your dataset."
)
with
open
(
filename
)
as
cf
:
reader
=
csv
.
reader
(
cf
)
header
=
next
(
reader
)
check_header
(
header
)
return
[
self
.
convert_row_to_sample
(
row
,
header
)
for
row
in
reader
]
def
convert_row_to_sample
(
self
,
row
,
header
):
path
=
row
[
0
]
subject
=
row
[
1
]
kwargs
=
dict
([[
h
,
r
]
for
h
,
r
in
zip
(
header
[
2
:],
row
[
2
:])])
return
DelayedSample
(
functools
.
partial
(
self
.
data_loader
,
os
.
path
.
join
(
self
.
dataset_original_directory
,
path
+
self
.
extension
),
),
key
=
path
,
subject
=
subject
,
**
kwargs
,
)
def
convert_samples_to_samplesets
(
self
,
samples
,
group_by_subject
=
True
,
references
=
None
):
def
get_attribute_from_sample
(
sample
):
return
dict
(
[
[
attribute
,
sample
.
__dict__
[
attribute
]]
for
attribute
in
list
(
sample
.
__dict__
.
keys
())
if
attribute
not
in
self
.
excluding_attributes
]
)
if
group_by_subject
:
# Grouping sample sets
sample_sets
=
dict
()
for
s
in
samples
:
if
s
.
subject
not
in
sample_sets
:
sample_sets
[
s
.
subject
]
=
SampleSet
(
[
s
],
**
get_attribute_from_sample
(
s
)
)
else
:
sample_sets
[
s
.
subject
].
append
(
s
)
return
list
(
sample_sets
.
values
())
else
:
return
[
SampleSet
([
s
],
**
get_attribute_from_sample
(
s
),
references
=
references
)
for
s
in
samples
]
class
CSVDatasetDevEval
:
"""
Generic filelist dataset for :any:`bob.bio.base.pipelines.VanillaBiometrics` pipeline.
Check :ref:`vanilla_biometrics_features` for more details about the Vanilla Biometrics Dataset
interface.
To create a new dataset, you need to provide a directory structure similar to the one below:
.. code-block:: text
my_dataset/
my_dataset/my_protocol/
my_dataset/my_protocol/train.csv
my_dataset/my_protocol/train.csv/dev_enroll.csv
my_dataset/my_protocol/train.csv/dev_probe.csv
my_dataset/my_protocol/train.csv/eval_enroll.csv
my_dataset/my_protocol/train.csv/eval_probe.csv
...
In the above directory structure, inside of `my_dataset` should contain the directories with all
evaluation protocols this dataset might have.
Inside of the `my_protocol` directory should contain at least two csv files:
- dev_enroll.csv
- dev_probe.csv
Those csv files should contain in each row i-) the path to raw data and ii-) the subject label
for enrollment (:ref:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.Database.references`) and
probing (:ref:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.Database.probes`).
The structure of each CSV file should be as below:
.. code-block:: text
PATH,SUBJECT
path_1,subject_1
path_2,subject_2
path_i,subject_j
...
You might want to ship metadata within your Samples (e.g gender, age, annotation, ...)
To do so is simple, just do as below:
.. code-block:: text
PATH,SUBJECT,METADATA_1,METADATA_2,METADATA_k
path_1,subject_1,A,B,C
path_2,subject_2,A,B,1
path_i,subject_j,2,3,4
...
The files `my_dataset/my_protocol/train.csv/eval_enroll.csv` and `my_dataset/my_protocol/train.csv/eval_probe.csv`
are optional and it is used in case a protocol contains data for evaluation.
Finally, the content of the file `my_dataset/my_protocol/train.csv` is used in the case a protocol
contains data for training (:ref:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.Database.background_model_samples`)
Parameters
----------
dataset_path: str
Absolute path of the dataset protocol description
protocol_na,e: str
The name of the protocol
csv_to_sample_loader: :any:`CSVBaseSampleLoader`
Base class that whose objective is to generate :any:`bob.pipelines.Samples`
and/or :any:`bob.pipelines.SampleSet` from csv rows
"""
def
__init__
(
self
,
dataset_protocol_path
,
protocol_name
,
csv_to_sample_loader
=
CSVToSampleLoader
(
data_loader
=
bob
.
io
.
base
.
load
,
dataset_original_directory
=
""
,
extension
=
""
),
):
def
get_paths
():
if
not
os
.
path
.
exists
(
dataset_protocol_path
):
raise
ValueError
(
f
"The path `
{
dataset_protocol_path
}
` was not found"
)
# TODO: Unzip file if dataset path is a zip
protocol_path
=
os
.
path
.
join
(
dataset_protocol_path
,
protocol_name
)
if
not
os
.
path
.
exists
(
protocol_path
):
raise
ValueError
(
f
"The protocol `
{
protocol_name
}
` was not found"
)
train_csv
=
os
.
path
.
join
(
protocol_path
,
"train.csv"
)
dev_enroll_csv
=
os
.
path
.
join
(
protocol_path
,
"dev_enroll.csv"
)
dev_probe_csv
=
os
.
path
.
join
(
protocol_path
,
"dev_probe.csv"
)
eval_enroll_csv
=
os
.
path
.
join
(
protocol_path
,
"eval_enroll.csv"
)
eval_probe_csv
=
os
.
path
.
join
(
protocol_path
,
"eval_probe.csv"
)
# The minimum required is to have `dev_enroll_csv` and `dev_probe_csv`
train_csv
=
train_csv
if
os
.
path
.
exists
(
train_csv
)
else
None
# Eval
eval_enroll_csv
=
(
eval_enroll_csv
if
os
.
path
.
exists
(
eval_enroll_csv
)
else
None
)
eval_probe_csv
=
eval_probe_csv
if
os
.
path
.
exists
(
eval_probe_csv
)
else
None
# Dev
if
not
os
.
path
.
exists
(
dev_enroll_csv
):
raise
ValueError
(
f
"The file `
{
dev_enroll_csv
}
` is required and it was not found"
)
if
not
os
.
path
.
exists
(
dev_probe_csv
):
raise
ValueError
(
f
"The file `
{
dev_probe_csv
}
` is required and it was not found"
)
return
(
train_csv
,
dev_enroll_csv
,
dev_probe_csv
,
eval_enroll_csv
,
eval_probe_csv
,
)
(
self
.
train_csv
,
self
.
dev_enroll_csv
,
self
.
dev_probe_csv
,
self
.
eval_enroll_csv
,
self
.
eval_probe_csv
,
)
=
get_paths
()
def
get_dict_cache
():
cache
=
dict
()
cache
[
"train"
]
=
None
cache
[
"dev_enroll_csv"
]
=
None
cache
[
"dev_probe_csv"
]
=
None
cache
[
"eval_enroll_csv"
]
=
None
cache
[
"eval_probe_csv"
]
=
None
return
cache
self
.
cache
=
get_dict_cache
()
self
.
csv_to_sample_loader
=
csv_to_sample_loader
def
background_model_samples
(
self
):
self
.
cache
[
"train"
]
=
(
self
.
csv_to_sample_loader
(
self
.
train_csv
)
if
self
.
cache
[
"train"
]
is
None
else
self
.
cache
[
"train"
]
)
return
self
.
cache
[
"train"
]
def
_get_samplesets
(
self
,
group
=
"dev"
,
purpose
=
"enroll"
,
group_by_subject
=
False
):
fetching_probes
=
False
if
purpose
==
"enroll"
:
cache_label
=
"dev_enroll_csv"
if
group
==
"dev"
else
"eval_enroll_csv"
else
:
fetching_probes
=
True
cache_label
=
"dev_probe_csv"
if
group
==
"dev"
else
"eval_probe_csv"
if
self
.
cache
[
cache_label
]
is
not
None
:
return
self
.
cache
[
cache_label
]
references
=
None
if
fetching_probes
:
references
=
list
(
set
([
s
.
subject
for
s
in
self
.
references
(
group
=
group
)]))
samples
=
self
.
csv_to_sample_loader
(
self
.
__dict__
[
cache_label
])
sample_sets
=
self
.
csv_to_sample_loader
.
convert_samples_to_samplesets
(
samples
,
group_by_subject
=
group_by_subject
,
references
=
references
)
self
.
cache
[
cache_label
]
=
sample_sets
return
self
.
cache
[
cache_label
]
def
references
(
self
,
group
=
"dev"
):
return
self
.
_get_samplesets
(
group
=
group
,
purpose
=
"enroll"
,
group_by_subject
=
True
)
def
probes
(
self
,
group
=
"dev"
):
return
self
.
_get_samplesets
(
group
=
group
,
purpose
=
"probe"
,
group_by_subject
=
False
)
class
CSVDatasetCrossValidation
:
"""
Generic filelist dataset for :any:`bob.bio.base.pipelines.VanillaBiometrics` pipeline that
handles **CROSS VALIDATION**.
Check :ref:`vanilla_biometrics_features` for more details about the Vanilla Biometrics Dataset
interface.