......@@ -6,18 +6,65 @@ from import read_original_data as _read_original_data
def make_output_path(output_dir, key):
return os.path.join(output_dir, key + '.hdf5')
"""Returns an output path used for saving keys. You need to make sure the
directories leading to this output path exist.
output_dir : str
The root directory to save the results
key : str
The key of the sample. Usually biofile.make_path("", "")
def load_data(biofile, read_original_data, original_directory,
data = read_original_data(biofile, original_directory, original_extension)
return data
The path for the provided key.
return os.path.join(output_dir, key + '.hdf5')
def bio_generator(database, groups, number_of_parallel_jobs, output_dir,
read_original_data=None, biofile_to_label=None,
multiple_samples=False, force=False):
"""Returns a generator and its output types and shapes based on databases.
database : :any:``
The database that you want to use.
groups : [str]
List of groups. Can be any permutation of ``('world', 'dev', 'eval')``
number_of_parallel_jobs : int
The number of parallel jobs that the script has ran with. This is used
to split the number files into array jobs.
output_dir : str
The root directory where the data will be saved.
read_original_data : :obj:`object`, optional
A callable with the signature of
``data = read_original_data(biofile, directory, extension)``.
:any:`` is used by default.
biofile_to_label : :obj:`object`, optional
A callable with the signature of ``label = biofile_to_label(biofile)``.
By default -1 is returned as label.
multiple_samples : bool, optional
If true, it assumes that the bio database's samples actually contain
multiple samples. This is useful for when you want to treat video
databases as image databases.
force : bool, optional
If true, all files will be overwritten.
generator : object
A generator function that when called will return the samples. The
samples will be like ``(data, label, key)``.
output_types : (object, object, object)
The types of the returned samples.
output_shapes : (tf.TensorShape, tf.TensorShape, tf.TensorShape)
The shapes of the returned samples.
if read_original_data is None:
read_original_data = _read_original_data
if biofile_to_label is None:
......@@ -35,7 +82,8 @@ def bio_generator(database, groups, number_of_parallel_jobs, output_dir,
outpath = make_output_path(output_dir, key)
if not force and os.path.isfile(outpath):
data = load_data(f, read_original_data, database)
data = read_original_data(f, database.original_directory,
# labels
if multiple_samples:
for d in data:
......@@ -44,7 +92,8 @@ def bio_generator(database, groups, number_of_parallel_jobs, output_dir,
yield (data, label, key)
# load one data to get its type and shape
data = load_data(biofiles[0], read_original_data, database)
data = read_original_data(biofiles[0], database.original_directory,
if multiple_samples:
data = data[0]
