Skip to content
Snippets Groups Projects

Update datafolder to work with bob9's videoLikeContainer file format

Open Vincent POLLET requested to merge bob_9_datafolder into master
1 file
+ 56
36
Compare changes
  • Side-by-side
  • Inline
@@ -23,7 +23,13 @@ import h5py
# ==============================================================================
def get_file_names_and_labels(files, data_folder, extension=".hdf5", hldi_type="pad"):
def get_file_names_and_labels(
files,
data_folder,
extension=".h5",
hldi_type="pad",
allow_missing_files=True,
):
"""
Get absolute names of the corresponding file objects and their class labels,
as well as keys defining name of the frame to load the data from.
@@ -39,33 +45,34 @@ def get_file_names_and_labels(files, data_folder, extension=".hdf5", hldi_type="
A directory containing the training data.
extension : str
Extension of the data files. Default: ".hdf5" .
Extension of the data files. Default: ".h5" .
hldi_type : str
Type of the high level database interface. Default: "pad".
Note: this is the only type supported at the moment.
allow_missing_files : bool
If False, will raise an error if a file of the database is missing.
If True only a warning will be printed.
Returns
-------
file_names_labels_keys : [(str, int, str)]
file_names_labels_indices : [(str, int, int)]
A list of tuples, where each tuple contain an absolute filename,
a corresponding label of the class, and a key defining the name of the
frame to extract the data from.
a corresponding label of the class, and the index of frame to extract
the data from.
"""
file_names_labels_keys = []
file_names_labels_indices = []
if hldi_type == "pad":
for f in files:
if f.attack_type is None:
label = 1
else:
label = 0
file_name = os.path.join(data_folder, f.path + extension)
@@ -74,25 +81,29 @@ def get_file_names_and_labels(files, data_folder, extension=".hdf5", hldi_type="
with h5py.File(file_name, "r") as f_h5py:
file_keys = list(f_h5py.keys())
# removes the 'FrameIndexes' key
file_keys = [f for f in file_keys if f != "FrameIndexes"]
n_frames = len(f_h5py["data"]) # shape[0]
# elements of tuples in the below list are as follows:
# a filename a key is extracted from,
# a label corresponding to the file,
# a key defining a frame from the file.
file_names_labels_keys = file_names_labels_keys + [
(file_name, label, key)
for file_name, label, key in zip(
[file_name] * len(file_keys),
[label] * len(file_keys),
file_keys,
)
]
file_names_labels_indices.extend(
[
(file_name, label, index)
for file_name, label, index in zip(
[file_name] * n_frames,
[label] * n_frames,
range(n_frames),
)
]
)
return file_names_labels_keys
else:
if not allow_missing_files:
raise ValueError(file_name + " is not a file")
print("Missing file: " + file_name)
return file_names_labels_indices
# ==============================================================================
@@ -121,7 +132,7 @@ class DataFolder(data.Dataset):
data_folder : str
A directory containing the training data. Note, that the training data
must be stored as a FrameContainers written to the hdf5 files. Other
must be stored as a VideoLikeContainer written to the hdf5 files. Other
formats are currently not supported.
transform : object
@@ -164,13 +175,14 @@ class DataFolder(data.Dataset):
self,
data_folder,
transform=None,
extension=".hdf5",
extension=".h5",
bob_hldi_instance=None,
hldi_type="pad",
groups=["train", "dev", "eval"],
protocol="grandtest",
purposes=["real", "attack"],
allow_missing_files=True,
custom_func=None,
**kwargs
):
"""
@@ -225,6 +237,7 @@ class DataFolder(data.Dataset):
self.protocol = protocol
self.purposes = purposes
self.allow_missing_files = allow_missing_files
self.custom_func = custom_func
if bob_hldi_instance is not None:
@@ -235,25 +248,26 @@ class DataFolder(data.Dataset):
**kwargs
)
file_names_labels_keys = get_file_names_and_labels(
file_names_labels_indices = get_file_names_and_labels(
files=files,
data_folder=self.data_folder,
extension=self.extension,
hldi_type=self.hldi_type,
allow_missing_files=self.allow_missing_files,
)
if self.allow_missing_files: # return only existing files
file_names_labels_keys = [
f for f in file_names_labels_keys if os.path.isfile(f[0])
file_names_labels_indices = [
f for f in file_names_labels_indices if os.path.isfile(f[0])
]
else:
# TODO - add behaviour similar to image folder
file_names_labels_keys = []
file_names_labels_indices = []
self.file_names_labels_keys = file_names_labels_keys
self.file_names_labels_indices = file_names_labels_indices
# ==========================================================================
def __getitem__(self, index):
@@ -278,12 +292,11 @@ class DataFolder(data.Dataset):
Index of the class.
"""
path, target, key = self.file_names_labels_keys[index]
path, target, frame_index = self.file_names_labels_indices[index]
with h5py.File(path, "r") as f_h5py:
img_array = np.array(
f_h5py.get(key + "/array")
f_h5py["data"][frame_index]
) # The size now is (3 x W x H)
if isinstance(
@@ -295,14 +308,15 @@ class DataFolder(data.Dataset):
img_array_tr = np.swapaxes(img_array, 1, 2)
img_array_tr = np.swapaxes(img_array_tr, 0, 2)
np_img = img_array_tr.copy() # np_img is numpy.ndarray of shape HxWxC
np_img = (
img_array_tr.copy()
) # np_img is numpy.ndarray of shape HxWxC
else: # for gray-scale images
np_img = np.expand_dims(
img_array_tr, 2
img_array, 2
) # np_img is numpy.ndarray of size HxWx1
if self.transform is not None:
np_img = self.transform(
@@ -316,6 +330,11 @@ class DataFolder(data.Dataset):
return img_array_transformed, target
# NOTE: make sure ``img_array_transformed`` converted to Tensor in your custom ``transform`` function.
if (
self.custom_func is not None
): # custom function to change the return to something else
return self.custom_func(np_img, target)
return np_img, target
# ==========================================================================
@@ -327,4 +346,5 @@ class DataFolder(data.Dataset):
len : int
The length of the file list.
"""
return len(self.file_names_labels_keys)
return len(self.file_names_labels_indices)
Loading