Added the replay-mobile example to How to port old bob database interface to the new csv format authored by Yannick DAYER's avatar Yannick DAYER
...@@ -103,5 +103,125 @@ if __name__ == "__main__": ...@@ -103,5 +103,125 @@ if __name__ == "__main__":
convert_database() convert_database()
``` ```
## Adding custom metadata to the CSV files
The following custom script creates the CSV protocol definition files for the _replay-mobile_ image dataset from a bob 8 database. This dataset needs some metadata fields that are added in the CSV files:
- `should_flip` indicates if the data in the sample's image needs to be mirrored horizontally;
- `attack_type` indicates if a probe is an attack, and what type of attack is applied.
These fields are constructed from the filename of each sample and the type of file (model or probe) in the `add_metadata` function.
This has to be run in a bob 8 environment.
```python
from bob.bio.face.database.replaymobile import ReplayMobileBioDatabase
import os
import tarfile
from csv import DictWriter
# Exceptions to the flip rule: some 'tablet' file need flipping:
FORCE_FLIP_IDS = ["26", "27"]
def convert_replaymobile_to_csv():
database = ReplayMobileBioDatabase()
# replaymobile-img has every protocol duplicated ('X-licit' and 'X-spoof')
all_protocols = database._db.protocol_names()
all_protocols = [p.replace("-licit", "") for p in all_protocols]
all_protocols = [p.replace("-spoof", "") for p in all_protocols]
all_protocols = list(set(all_protocols))
for protocol in all_protocols:
# Retrieve the file lists from the legacy db
train_files = database.objects(groups=["world"], protocol=protocol+"-licit", purposes=["enroll"])
dev_enroll = database.objects(groups=["dev"], protocol=protocol+"-licit", purposes=["enroll"])
dev_probe_licit = database.objects(groups=["dev"], protocol=protocol+"-licit", purposes=["probe"])
dev_probe_spoof = database.objects(groups=["dev"], protocol=protocol+"-spoof", purposes=["probe"])
eval_enroll = database.objects(groups=["eval"], protocol=protocol+"-licit", purposes=["enroll"])
eval_probe_licit = database.objects(groups=["eval"], protocol=protocol+"-licit", purposes=["probe"])
eval_probe_spoof = database.objects(groups=["eval"], protocol=protocol+"-spoof", purposes=["probe"])
# Check that the lists are not empty
has_eval, has_train = True, True
if not all([eval_enroll, eval_probe_licit, eval_probe_spoof]):
has_eval = False
if not train_files:
has_train = False
def add_metadata(list_of_files, attack=False):
"""Adds metadata fields to each file in the list"""
for f in list_of_files:
f.frame = int(f.path[-3:])
f.path = f.path[:-4]
split_path = f.path.split('_')
if not attack: # Genuine files have one filename format
f.reference_id = int(split_path[-5][-3:])
f.purpose = split_path[-3]
else: # Attack files have a different filename format
f.reference_id = int(split_path[-7][-3:])
f.capturing_device = split_path[-3]
f.purpose = "attack"
f.attack_type = "spoof"
f.capturing_device = split_path[-2]
f.should_flip = f.capturing_device == "mobile" or f.id.split('_')[0] in FORCE_FLIP_IDS
# Add the metadata to each file in each list
add_metadata(dev_enroll)
add_metadata(dev_probe_licit)
add_metadata(dev_probe_spoof, True)
if has_eval:
add_metadata(eval_enroll)
add_metadata(eval_probe_licit)
add_metadata(eval_probe_spoof, True)
if has_train:
add_metadata(train_files)
# Create the folder structure
protocol_path = os.path.join("replaymobile-img", protocol)
dev_path = os.path.join(protocol_path, "dev")
os.makedirs(dev_path, exist_ok=True)
if has_eval:
eval_path = os.path.join(protocol_path, "eval")
os.makedirs(eval_path, exist_ok=True)
if has_train:
train_path = os.path.join(protocol_path, "norm")
os.makedirs(train_path, exist_ok=True)
# Writing the CSV files
def write_to_csv(path, filelist, header, fields):
with open(path, "w") as f:
csv_writer = DictWriter(f, delimiter=',', fieldnames=header)
csv_writer.writeheader()
csv_writer.writerows([{k:v for k,v in zip(header, [getattr(s, a, None) for a in fields])} for s in filelist])
# Columns in the csv (header)
csv_fields = ["PATH", "REFERENCE_ID", "ID", "FRAME", "PURPOSE", "SHOULD_FLIP"]
# Corresponding fields in the File objects
file_attr = ["path", "reference_id", "id", "frame", "purpose", "should_flip"]
# Probe header have some special metadata/columns
csv_fields_probes = csv_fields + ["ATTACK_TYPE", ]
file_attr_probes = file_attr + ["attack_type", ]
write_to_csv(os.path.join(dev_path, "for_models.csv"), dev_enroll, csv_fields, file_attr)
write_to_csv(os.path.join(dev_path, "for_probes.csv"), dev_probe_licit+dev_probe_spoof, csv_fields_probes, file_attr_probes)
if has_eval:
write_to_csv(os.path.join(eval_path, "for_models.csv"), eval_enroll, csv_fields, file_attr)
write_to_csv(os.path.join(eval_path, "for_probes.csv"), eval_probe_licit+eval_probe_spoof, csv_fields_probes, file_attr_probes)
if has_train:
write_to_csv(os.path.join(train_path, "train_world.csv"), train_files, csv_fields, file_attr)
# Create the final tarball
path = f"bio-face-replaymobile-img.tar.gz"
with tarfile.open(path, "w:gz") as tar:
tar.add("replaymobile-img", arcname=".")
print(f"Created '{path}'.")
if __name__ == "__main__":
convert_replaymobile_to_csv()
```
# Documentation # Documentation
Document the protocols, data format, metadata Document the protocols, data format, metadata
\ No newline at end of file