Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.learn.tensorflow
Commits
e69b911c
Commit
e69b911c
authored
Oct 24, 2017
by
Amir MOHAMMADI
Browse files
add more documentation
parent
7ea85d1b
Pipeline
#13450
passed with stages
in 29 minutes and 51 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
bob/learn/tensorflow/dataset/bio.py
View file @
e69b911c
...
...
@@ -6,18 +6,65 @@ from bob.bio.base import read_original_data as _read_original_data
def
make_output_path
(
output_dir
,
key
):
return
os
.
path
.
join
(
output_dir
,
key
+
'.hdf5'
)
"""Returns an output path used for saving keys. You need to make sure the
directories leading to this output path exist.
Parameters
----------
output_dir : str
The root directory to save the results
key : str
The key of the sample. Usually biofile.make_path("", "")
def
load_data
(
biofile
,
read_original_data
,
original_directory
,
original_extension
):
data
=
read_original_data
(
biofile
,
original_directory
,
original_extension
)
return
data
Returns
-------
str
The path for the provided key.
"""
return
os
.
path
.
join
(
output_dir
,
key
+
'.hdf5'
)
def
bio_generator
(
database
,
groups
,
number_of_parallel_jobs
,
output_dir
,
read_original_data
=
None
,
biofile_to_label
=
None
,
multiple_samples
=
False
,
force
=
False
):
"""Returns a generator and its output types and shapes based on
bob.bio.base databases.
Parameters
----------
database : :any:`bob.bio.base.database.BioDatabase`
The database that you want to use.
groups : [str]
List of groups. Can be any permutation of ``('world', 'dev', 'eval')``
number_of_parallel_jobs : int
The number of parallel jobs that the script has ran with. This is used
to split the number files into array jobs.
output_dir : str
The root directory where the data will be saved.
read_original_data : :obj:`object`, optional
A callable with the signature of
``data = read_original_data(biofile, directory, extension)``.
:any:`bob.bio.base.read_original_data` is used by default.
biofile_to_label : :obj:`object`, optional
A callable with the signature of ``label = biofile_to_label(biofile)``.
By default -1 is returned as label.
multiple_samples : bool, optional
If true, it assumes that the bio database's samples actually contain
multiple samples. This is useful for when you want to treat video
databases as image databases.
force : bool, optional
If true, all files will be overwritten.
Returns
-------
generator : object
A generator function that when called will return the samples. The
samples will be like ``(data, label, key)``.
output_types : (object, object, object)
The types of the returned samples.
output_shapes : (tf.TensorShape, tf.TensorShape, tf.TensorShape)
The shapes of the returned samples.
"""
if
read_original_data
is
None
:
read_original_data
=
_read_original_data
if
biofile_to_label
is
None
:
...
...
@@ -35,7 +82,8 @@ def bio_generator(database, groups, number_of_parallel_jobs, output_dir,
outpath
=
make_output_path
(
output_dir
,
key
)
if
not
force
and
os
.
path
.
isfile
(
outpath
):
continue
data
=
load_data
(
f
,
read_original_data
,
database
)
data
=
read_original_data
(
f
,
database
.
original_directory
,
database
.
original_extension
)
# labels
if
multiple_samples
:
for
d
in
data
:
...
...
@@ -44,7 +92,8 @@ def bio_generator(database, groups, number_of_parallel_jobs, output_dir,
yield
(
data
,
label
,
key
)
# load one data to get its type and shape
data
=
load_data
(
biofiles
[
0
],
read_original_data
,
database
)
data
=
read_original_data
(
biofiles
[
0
],
database
.
original_directory
,
database
.
original_extension
)
if
multiple_samples
:
try
:
data
=
data
[
0
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment