... | ... | @@ -103,5 +103,125 @@ if __name__ == "__main__": |
|
|
convert_database()
|
|
|
```
|
|
|
|
|
|
## Adding custom metadata to the CSV files
|
|
|
|
|
|
The following custom script creates the CSV protocol definition files for the _replay-mobile_ image dataset from a bob 8 database. This dataset needs some metadata fields that are added in the CSV files:
|
|
|
|
|
|
- `should_flip` indicates if the data in the sample's image needs to be mirrored horizontally;
|
|
|
- `attack_type` indicates if a probe is an attack, and what type of attack is applied.
|
|
|
|
|
|
These fields are constructed from the filename of each sample and the type of file (model or probe) in the `add_metadata` function.
|
|
|
|
|
|
This has to be run in a bob 8 environment.
|
|
|
|
|
|
```python
|
|
|
from bob.bio.face.database.replaymobile import ReplayMobileBioDatabase
|
|
|
import os
|
|
|
import tarfile
|
|
|
from csv import DictWriter
|
|
|
|
|
|
# Exceptions to the flip rule: some 'tablet' file need flipping:
|
|
|
FORCE_FLIP_IDS = ["26", "27"]
|
|
|
|
|
|
def convert_replaymobile_to_csv():
|
|
|
database = ReplayMobileBioDatabase()
|
|
|
|
|
|
# replaymobile-img has every protocol duplicated ('X-licit' and 'X-spoof')
|
|
|
all_protocols = database._db.protocol_names()
|
|
|
all_protocols = [p.replace("-licit", "") for p in all_protocols]
|
|
|
all_protocols = [p.replace("-spoof", "") for p in all_protocols]
|
|
|
all_protocols = list(set(all_protocols))
|
|
|
|
|
|
for protocol in all_protocols:
|
|
|
# Retrieve the file lists from the legacy db
|
|
|
train_files = database.objects(groups=["world"], protocol=protocol+"-licit", purposes=["enroll"])
|
|
|
dev_enroll = database.objects(groups=["dev"], protocol=protocol+"-licit", purposes=["enroll"])
|
|
|
dev_probe_licit = database.objects(groups=["dev"], protocol=protocol+"-licit", purposes=["probe"])
|
|
|
dev_probe_spoof = database.objects(groups=["dev"], protocol=protocol+"-spoof", purposes=["probe"])
|
|
|
eval_enroll = database.objects(groups=["eval"], protocol=protocol+"-licit", purposes=["enroll"])
|
|
|
eval_probe_licit = database.objects(groups=["eval"], protocol=protocol+"-licit", purposes=["probe"])
|
|
|
eval_probe_spoof = database.objects(groups=["eval"], protocol=protocol+"-spoof", purposes=["probe"])
|
|
|
|
|
|
# Check that the lists are not empty
|
|
|
has_eval, has_train = True, True
|
|
|
if not all([eval_enroll, eval_probe_licit, eval_probe_spoof]):
|
|
|
has_eval = False
|
|
|
if not train_files:
|
|
|
has_train = False
|
|
|
|
|
|
def add_metadata(list_of_files, attack=False):
|
|
|
"""Adds metadata fields to each file in the list"""
|
|
|
for f in list_of_files:
|
|
|
f.frame = int(f.path[-3:])
|
|
|
f.path = f.path[:-4]
|
|
|
split_path = f.path.split('_')
|
|
|
if not attack: # Genuine files have one filename format
|
|
|
f.reference_id = int(split_path[-5][-3:])
|
|
|
f.purpose = split_path[-3]
|
|
|
else: # Attack files have a different filename format
|
|
|
f.reference_id = int(split_path[-7][-3:])
|
|
|
f.capturing_device = split_path[-3]
|
|
|
f.purpose = "attack"
|
|
|
f.attack_type = "spoof"
|
|
|
f.capturing_device = split_path[-2]
|
|
|
f.should_flip = f.capturing_device == "mobile" or f.id.split('_')[0] in FORCE_FLIP_IDS
|
|
|
|
|
|
# Add the metadata to each file in each list
|
|
|
add_metadata(dev_enroll)
|
|
|
add_metadata(dev_probe_licit)
|
|
|
add_metadata(dev_probe_spoof, True)
|
|
|
if has_eval:
|
|
|
add_metadata(eval_enroll)
|
|
|
add_metadata(eval_probe_licit)
|
|
|
add_metadata(eval_probe_spoof, True)
|
|
|
if has_train:
|
|
|
add_metadata(train_files)
|
|
|
|
|
|
# Create the folder structure
|
|
|
protocol_path = os.path.join("replaymobile-img", protocol)
|
|
|
dev_path = os.path.join(protocol_path, "dev")
|
|
|
os.makedirs(dev_path, exist_ok=True)
|
|
|
if has_eval:
|
|
|
eval_path = os.path.join(protocol_path, "eval")
|
|
|
os.makedirs(eval_path, exist_ok=True)
|
|
|
if has_train:
|
|
|
train_path = os.path.join(protocol_path, "norm")
|
|
|
os.makedirs(train_path, exist_ok=True)
|
|
|
|
|
|
# Writing the CSV files
|
|
|
def write_to_csv(path, filelist, header, fields):
|
|
|
with open(path, "w") as f:
|
|
|
csv_writer = DictWriter(f, delimiter=',', fieldnames=header)
|
|
|
csv_writer.writeheader()
|
|
|
csv_writer.writerows([{k:v for k,v in zip(header, [getattr(s, a, None) for a in fields])} for s in filelist])
|
|
|
|
|
|
# Columns in the csv (header)
|
|
|
csv_fields = ["PATH", "REFERENCE_ID", "ID", "FRAME", "PURPOSE", "SHOULD_FLIP"]
|
|
|
# Corresponding fields in the File objects
|
|
|
file_attr = ["path", "reference_id", "id", "frame", "purpose", "should_flip"]
|
|
|
|
|
|
# Probe header have some special metadata/columns
|
|
|
csv_fields_probes = csv_fields + ["ATTACK_TYPE", ]
|
|
|
file_attr_probes = file_attr + ["attack_type", ]
|
|
|
|
|
|
write_to_csv(os.path.join(dev_path, "for_models.csv"), dev_enroll, csv_fields, file_attr)
|
|
|
write_to_csv(os.path.join(dev_path, "for_probes.csv"), dev_probe_licit+dev_probe_spoof, csv_fields_probes, file_attr_probes)
|
|
|
if has_eval:
|
|
|
write_to_csv(os.path.join(eval_path, "for_models.csv"), eval_enroll, csv_fields, file_attr)
|
|
|
write_to_csv(os.path.join(eval_path, "for_probes.csv"), eval_probe_licit+eval_probe_spoof, csv_fields_probes, file_attr_probes)
|
|
|
if has_train:
|
|
|
write_to_csv(os.path.join(train_path, "train_world.csv"), train_files, csv_fields, file_attr)
|
|
|
|
|
|
# Create the final tarball
|
|
|
path = f"bio-face-replaymobile-img.tar.gz"
|
|
|
with tarfile.open(path, "w:gz") as tar:
|
|
|
tar.add("replaymobile-img", arcname=".")
|
|
|
|
|
|
print(f"Created '{path}'.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
convert_replaymobile_to_csv()
|
|
|
```
|
|
|
|
|
|
# Documentation
|
|
|
Document the protocols, data format, metadata |
|
|
\ No newline at end of file |