Commit c478e2dd authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Refactor

parent 8e329713
Pipeline #57320 failed with stages
in 2 minutes and 59 seconds
......@@ -4,7 +4,7 @@
from torch.utils.data import Dataset
from bob.bio.face.database import MEDSDatabase, MorphDatabase
from bob.bio.face.database import MEDSDatabase, MorphDatabase, RFWDatabase
import torchvision.transforms as transforms
......@@ -30,6 +30,7 @@ class DemoraphicTorchDataset(Dataset):
return len(self.bucket)
def __getitem__(self, idx):
sample = self.bucket[idx]
image = sample.data if self.transform is None else self.transform(sample.data)
......@@ -136,3 +137,32 @@ class MorphTorchDataset(DemoraphicTorchDataset):
demographic_key = f"{sample.rac}-{sample.sex}"
return self.demographic_keys[demographic_key]
class RFWTorchDataset(DemoraphicTorchDataset):
def __init__(
self, protocol, database_path, database_extension=".h5", transform=None
):
bob_dataset = RFWDatabase(
protocol=protocol,
dataset_original_directory=database_path,
dataset_original_extension=database_extension,
)
super().__init__(bob_dataset, transform=transform)
def load_demographics(self):
target_metadata = "race"
metadata_keys = set(
[getattr(sset, target_metadata) for sset in self.bob_dataset.zprobes()]
+ [
getattr(sset, target_metadata)
for sset in self.bob_dataset.treferences()
]
)
metadata_keys = dict(zip(metadata_keys, range(len(metadata_keys))))
return metadata_keys
def get_demographics(self, sample):
demographic_key = getattr(sample, "race")
return self.demographic_keys[demographic_key]
......@@ -63,6 +63,7 @@ def morph_experiment(backbone, checkpoint_path, experiment_path, dask_client, **
database = MorphDatabase(protocol=protocol)
groups = ["dev", "eval"]
# groups = ["eval"]
execute_vanilla_biometrics(
pipeline,
database,
......
from bob.bio.demographics.datasets import MedsTorchDataset
# https://pytorch.org/docs/stable/data.html
from torch.utils.data import DataLoader
from bob.extension import rc
import os
import bob.io.image
import torch
from functools import partial
import torchvision.transforms as transforms
import click
import yaml
from bob.bio.demographics.regularizers.trainers import balance_trainer
@click.command()
@click.argument("OUTPUT_DIR")
@click.option("--max-epochs", default=600, help="Max number of epochs")
@click.option("--batch-size", default=64, help="Batch size")
@click.option("--backbone", default="iresnet100", help="Backbone")
def balance_meds(
output_dir, max_epochs, batch_size, backbone,
):
from bob.bio.demographics.regularizers import AVAILABLE_BACKBONES
database_path = os.path.join(
rc.get("bob.bio.demographics.directory"), "meds", "samplewrapper"
)
transform = transforms.Compose(
[
lambda x: bob.io.image.to_matplotlib(x.astype("float32")),
# transforms.ToPILImage(mode="RGB"),
# transforms.RandomHorizontalFlip(p=0.5),
# transforms.RandomRotation(degrees=(-3, 3)),
# transforms.RandomAutocontrast(p=0.1),
transforms.ToTensor(),
lambda x: (x - 127.5) / 128.0,
]
)
dataset = MedsTorchDataset(
protocol="verification_fold1", database_path=database_path, transform=transform,
)
train_dataloader = DataLoader(
dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2
)
# train_dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
backbone_model = AVAILABLE_BACKBONES[backbone]["prior"]()
balance_trainer(
output_dir, max_epochs, batch_size, train_dataloader, backbone_model, transform,
)
if __name__ == "__main__":
balance_meds()
from bob.bio.demographics.datasets import MorphTorchDataset
# https://pytorch.org/docs/stable/data.html
from torch.utils.data import DataLoader
from bob.extension import rc
import os
import bob.io.image
import torch
from functools import partial
import torchvision.transforms as transforms
import click
import yaml
from bob.bio.demographics.regularizers.trainers import balance_trainer
@click.command()
@click.argument("OUTPUT_DIR")
@click.option("--max-epochs", default=600, help="Max number of epochs")
@click.option("--batch-size", default=64, help="Batch size")
@click.option("--backbone", default="iresnet100", help="Backbone")
def balance_morph(
output_dir, max_epochs, batch_size, backbone,
):
from bob.bio.demographics.regularizers import AVAILABLE_BACKBONES
database_path = os.path.join(
rc.get("bob.bio.demographics.directory"), "morph", "samplewrapper"
)
transform = transforms.Compose(
[
lambda x: bob.io.image.to_matplotlib(x.astype("float32")),
# transforms.ToPILImage(mode="RGB"),
# transforms.RandomHorizontalFlip(p=0.5),
# transforms.RandomRotation(degrees=(-3, 3)),
# transforms.RandomAutocontrast(p=0.1),
transforms.ToTensor(),
lambda x: (x - 127.5) / 128.0,
]
)
dataset = MorphTorchDataset(
protocol="verification_fold1", database_path=database_path, transform=transform,
)
train_dataloader = DataLoader(
dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2
)
# train_dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
backbone_model = AVAILABLE_BACKBONES[backbone]["prior"]()
balance_trainer(
output_dir, max_epochs, batch_size, train_dataloader, backbone_model, transform,
)
if __name__ == "__main__":
balance_morph()
......@@ -289,12 +289,18 @@ def plot_fdr(
for neg, pos, t in zip(negatives, positives, taus)
]
fig, ax = plt.subplots(figsize=(16, 8))
fig.suptitle(title)
# fig, ax = plt.subplots(figsize=(16, 8))
fig, ax = plt.subplots(figsize=(8, 6))
# fig.suptitle(title)
[plt.semilogx(fmr_thresholds, f, label=l) for f, l in zip(fdrs, labels)]
[
plt.semilogx(fmr_thresholds, f, label=l, linewidth=2)
for f, l in zip(fdrs, labels)
]
[plt.scatter(fmr_thresholds, f) for f in fdrs]
print(fdrs)
plt.ylabel("$FDR(\\tau)$", fontsize=18)
plt.xlabel("$\\tau=FMR_{10^{-x}}$", fontsize=18)
......
......@@ -19,7 +19,9 @@ def facecrop_pipeline(database, preprocessor, output_dir, dask_client):
# )
background_model_samples = database.background_model_samples()
pipeline.transform(background_model_samples).compute(scheduler=dask_client)
if len(background_model_samples) > 0:
pipeline.transform(background_model_samples).compute(scheduler=dask_client)
if hasattr(database, "zprobes"):
pipeline.transform(database.zprobes()).compute(scheduler=dask_client)
......
from bob.learn.pytorch.trainers import BackboneHeadModel
from torch.nn import Module, Linear
import pytorch_lightning as pl
import torch
import torch.nn.functional as F
import numpy as np
import torch.nn as nn
import copy
import math
class SimpleBalanceModel(BackboneHeadModel):
"""
Trainer that trains using a balanced dataset
"""
def __init__(
self,
backbone,
identity_head,
loss_fn,
optimizer_fn,
backbone_checkpoint_path=None,
max_epochs=500,
**kwargs,
):
# super(pl.LightningModule, self).__init__(**kwargs)
pl.LightningModule.__init__(self, **kwargs)
self.backbone = backbone
self.identity_head = identity_head
self.loss_fn = loss_fn
self.optimizer_fn = optimizer_fn
self.max_epochs = max_epochs
self.backbone_checkpoint_path = backbone_checkpoint_path
# self.last_op = None
def training_epoch_end(self, training_step_outputs):
if self.backbone_checkpoint_path:
state = self.backbone.state_dict()
torch.save(state, self.backbone_checkpoint_path)
pass
def training_step(self, batch, batch_idx):
data = batch["data"]
label = batch["label"]
# demography = batch["demography"]
embedding = self.backbone(data)
# Identity loss
logits_identiy = self.identity_head(embedding, label)
loss_identity = self.loss_fn(logits_identiy, label)
self.log("train/loss_identity", loss_identity)
acc = (
sum(
np.argmax(logits_identiy.cpu().detach().numpy(), axis=1)
== label.cpu().detach().numpy()
)
/ label.shape[0]
)
self.log("train/acc_identity", acc)
self.log("train/total_loss", loss_identity)
return loss_identity
def configure_optimizers(self):
# optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return self.optimizer_fn(params=self.parameters())
......@@ -12,6 +12,9 @@ from bob.bio.demographics.regularizers.independence import (
OrthogonalityModel,
MINEModel,
)
from bob.bio.demographics.regularizers.balance import SimpleBalanceModel
import pytorch_lightning as pl
import torch
from functools import partial
......@@ -237,3 +240,84 @@ def mine_trainer(
trainer.fit(
model=model, train_dataloaders=train_dataloader,
)
def balance_trainer(
output_dir, max_epochs, batch_size, train_dataloader, backbone_model, transform,
):
"""
Trains a Pytorch CNN using the balance hypothesis
"""
# Defing the variables of the experiment, so we don't get lost
os.makedirs(output_dir, exist_ok=True)
with open(f"{output_dir}/config.yml", "w") as file:
dict_file = dict()
dict_file["max_epochs"] = max_epochs
dict_file["batch_size"] = batch_size
dict_file["hypothesis"] = "Balance"
yaml.dump(dict_file, file)
backbone_checkpoint_path = f"{output_dir}/model.pth"
checkpoint_dir = f"{output_dir}/last.ckpt"
#####################
## IDENTITY
num_class = len(list(train_dataloader.dataset.labels.values()))
identity_head = ArcFace(
feat_dim=backbone_model.features.num_features, num_class=num_class
)
optimizer = partial(torch.optim.SGD, lr=0.001, momentum=0.9)
model = SimpleBalanceModel(
backbone=backbone_model,
identity_head=identity_head,
loss_fn=torch.nn.CrossEntropyLoss(),
optimizer_fn=optimizer,
backbone_checkpoint_path=backbone_checkpoint_path,
max_epochs=max_epochs,
)
"""
from bob.bio.face.pytorch.callbacks import VanillaBiometricsCallback
vanilla_callback = VanillaBiometricsCallback(
config="/remote/idiap.svm/user.active/tpereira/gitlab/bob/bob.nightlies/src/bob.bio.demographics/bob/bio/demographics/fair_transformers/transformers.py",
output_path="./vanilla-callback",
)
"""
model_checkpoint = ModelCheckpoint(
output_dir, every_n_train_steps=100, save_last=True
)
logger = TensorBoardLogger(os.path.join(output_dir, "tb_logs"))
# Be careful with
# https://github.com/PyTorchLightning/pytorch-lightning/issues/5325
resume_from_checkpoint = checkpoint_dir if os.path.exists(checkpoint_dir) else None
# TODO: using this code to learn too
# so, be nice with my comments
# callbacks=[model_checkpoint, vanilla_callback],
callbacks = [model_checkpoint]
trainer = pl.Trainer(
callbacks=callbacks,
logger=logger,
max_epochs=max_epochs,
gpus=-1 if torch.cuda.is_available() else None,
resume_from_checkpoint=resume_from_checkpoint,
# resume_from_checkpoint=resume_from_checkpoint, #https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#resume-from-checkpoint
# debug flags
# limit_train_batches=10, # https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#limit-train-batches
# limit_val_batches=1,
amp_level="00", # https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html#amp-level
log_every_n_steps=5,
)
trainer.fit(
model=model, train_dataloaders=train_dataloader,
)
......@@ -4,17 +4,22 @@ from bob.bio.face.preprocessor import FaceCrop
def face_crop():
from bob.bio.face.database import MEDSDatabase
output_dir = "/idiap/temp/tpereira/3.FaceCrops/meds/"
protocol = "verification_fold1"
database = MEDSDatabase(protocol=protocol)
# from bob.bio.face.database import MEDSDatabase
# output_dir = "/idiap/temp/tpereira/3.FaceCrops/meds/"
# protocol = "verification_fold1"
# database = MEDSDatabase(protocol=protocol)
# from bob.bio.face.database import MorphDatabase
# output_dir = "/idiap/temp/tpereira/3.FaceCrops/morph/"
# protocol = "verification_fold1"
# database = MorphDatabase(protocol=protocol)
from bob.bio.face.database import RFWDatabase
output_dir = "/idiap/temp/tpereira/3.FaceCrops/rfw/"
protocol = "verification_fold1"
database = RFWDatabase(protocol="original")
cropped_image_size = (112, 112)
cropped_positions = {
"leye": (55, 72),
......@@ -28,7 +33,7 @@ def face_crop():
color_channel=color_channel,
)
# dask_client = "single-threaded"
dask_client = "single-threaded"
from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
......
from bob.bio.demographics.datasets import MedsTorchDataset, MorphTorchDataset
from bob.bio.demographics.datasets import (
MedsTorchDataset,
MorphTorchDataset,
RFWTorchDataset,
)
import time
# https://pytorch.org/docs/stable/data.html
......@@ -44,6 +48,26 @@ def test_morph():
protocol="verification_fold1", database_path=database_path,
)
# dataloader = DataLoader(
# dataset, batch_size=64, shuffle=True, pin_memory=True, num_workers=2
# )
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
batch = next(iter(dataloader))
batch["data"].shape == (64, 3, 112, 112)
@pytest.mark.skipif(
rc.get("bob.bio.demographics.directory") is None,
reason="Demographics features directory not available. Please do `bob config set bob.bio.demographics.directory [PATH]` to set the base features path.",
)
def test_rfw():
database_path = os.path.join(
rc.get("bob.bio.demographics.directory"), "rfw", "samplewrapper"
)
dataset = RFWTorchDataset(protocol="original", database_path=database_path,)
# dataloader = DataLoader(
# dataset, batch_size=64, shuffle=True, pin_memory=True, num_workers=2
# )
......
......@@ -12,11 +12,23 @@ import click
default=rc.get("bob.bio.demographics.path"),
help=f'Output path. Default to: {rc.get("bob.bio.demographics.path")}',
)
@click.option(
"--pipeline",
"-p",
entry_point_group="bob.bio.pipeline",
required=True,
help="Vanilla biometrics pipeline composed of a scikit-learn Pipeline and a BioAlgorithm",
cls=ResourceOption,
)
@click.option("--sge", is_flag=True)
def run(output_path, sge):
def run(output_path, pipeline, sge):
output_path = "./results" if output_path is None else output_path
import ipdb
ipdb.set_trace()
# DATABASE
from bob.bio.face.database import MobioDatabase
......
BASE_PATH=/remote/idiap.svm/user.active/tpereira/gitlab/bob/bob.nightlies
#$BASE_PATH/bin/bob bio pipelines vanilla-biometrics $BASE_PATH/src/bob.bio.demographics/bob/bio/demographics/experiments/evaluation/ortogonality_hypothesis/meds.py --output /idiap/temp/tpereira/2.FRDemographics/regularization/experiments/orthogonality/iresnet100_identity_factor1_orthogonality_factor1 --dask-partition-size 50 -c -g dev -g eval -l sge
#/idiap/temp/tpereira/2.FRDemographics/regularization/experiments/orthogonality/meds/analysis
OUTPUT_PATH=/idiap/temp/tpereira/2.FRDemographics/regularization/experiments/balance/meds/
MODELS_PATH=/idiap/temp/tpereira/2.FRDemographics/regularization/models/balance/meds/
BACKBONE=iresnet100
array=( "balance")
for CNN in "${array[@]}"
do
./bin/python $BASE_PATH/src/bob.bio.demographics/bob/bio/demographics/experiments/evaluation/ortogonality/meds.py $BACKBONE $MODELS_PATH/$BACKBONE/$CNN/model.pth $OUTPUT_PATH/$BACKBONE/$CNN/ -l sge
done
# -l sge
BASE_PATH=/idiap/temp/tpereira/2.FRDemographics/regularization/models/balance/meds/
BACKBONE=iresnet100
COMMAND=./src/bob.bio.demographics/bob/bio/demographics/experiments/train/balance/meds.py
./bin/jman submit --name BALANCE -q sgpu -- ./bin/python $COMMAND $BASE_PATH/$BACKBONE/balance \
--backbone $BACKBONE \
--max-epochs 200
BASE_PATH=/remote/idiap.svm/user.active/tpereira/gitlab/bob/bob.nightlies
#$BASE_PATH/bin/bob bio pipelines vanilla-biometrics $BASE_PATH/src/bob.bio.demographics/bob/bio/demographics/experiments/evaluation/ortogonality_hypothesis/meds.py --output /idiap/temp/tpereira/2.FRDemographics/regularization/experiments/orthogonality/iresnet100_identity_factor1_orthogonality_factor1 --dask-partition-size 50 -c -g dev -g eval -l sge
#/idiap/temp/tpereira/2.FRDemographics/regularization/experiments/orthogonality/meds/analysis
OUTPUT_PATH=/idiap/temp/tpereira/2.FRDemographics/regularization/experiments/balance/morph/
MODELS_PATH=/idiap/temp/tpereira/2.FRDemographics/regularization/models/balance/morph/
BACKBONE=iresnet100
array=( "balance")
for CNN in "${array[@]}"
do
./bin/python $BASE_PATH/src/bob.bio.demographics/bob/bio/demographics/experiments/evaluation/ortogonality/morph.py $BACKBONE $MODELS_PATH/$BACKBONE/$CNN/model.pth $OUTPUT_PATH/$BACKBONE/$CNN/
#-l sge-gpu
done
# -l sge
BASE_PATH=/idiap/temp/tpereira/2.FRDemographics/regularization/models/balance/morph/
BACKBONE=iresnet100
COMMAND=./src/bob.bio.demographics/bob/bio/demographics/experiments/train/balance/morph.py
./bin/jman submit --name BALANCE -q sgpu -- ./bin/python $COMMAND $BASE_PATH/$BACKBONE/balance \
--backbone $BACKBONE \
--max-epochs 25
......@@ -25,6 +25,7 @@ database_name = "meds"
# Baselines
from bob.bio.face.embeddings.tensorflow import (
inception_resnet_v2_msceleb_centerloss_2018,
facenet_sanderberg_20170512_110547,
......@@ -37,25 +38,32 @@ from bob.bio.face.embeddings.pytorch import (
iresnet50,
)
from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
from bob.bio.face.embeddings.o