Commit 963f9456 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Several updates

parent 617a5159
Pipeline #58543 failed with stages
in 2 minutes and 26 seconds
......@@ -14,7 +14,6 @@ from bob.bio.face.pytorch.preprocessing import get_standard_data_augmentation
import os
BATCH_SIZE = 128
PROTOCOL = "mobile0-male-female"
DATABASE_PATH = os.path.join(
rc.get("bob.bio.demographics.directory"), "mobio", "samplewrapper"
......@@ -45,20 +44,6 @@ train_dataset = MobioTorchDataset(
# train=False,
# )
train_dataloader = DataLoader(
train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
pin_memory=True,
num_workers=4,
)
# For some reason we have an issue with
validation_dataset = None
# validation_dataloader = DataLoader(
# validation_dataset,
# batch_size=BATCH_SIZE,
# shuffle=False,
# pin_memory=False,
# num_workers=1,
# )
......@@ -12,10 +12,6 @@ import torchvision.transforms as transforms
from bob.bio.face.pytorch.preprocessing import get_standard_data_augmentation
if locals().get("BATCH_SIZE") is None:
BATCH_SIZE = 128
PROTOCOL = "vgg2-full"
DATABASE_PATH = rc.get("bob.bio.face.vgg2-crops.directory")
DATABASE_EXTENSION = ".jpg"
......@@ -43,20 +39,4 @@ validation_dataset = VGG2TorchDataset(
train=False,
)
train_dataloader = DataLoader(
train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
pin_memory=True,
num_workers=4,
)
validation_dataloader = DataLoader(
validation_dataset,
batch_size=BATCH_SIZE,
shuffle=False,
pin_memory=False,
num_workers=1,
)
# validation_dataloader = None
# validation_dataset = None
......@@ -12,7 +12,6 @@ import torchvision.transforms as transforms
from bob.bio.face.pytorch.preprocessing import get_standard_data_augmentation
BATCH_SIZE = 128
PROTOCOL = "vgg2-full"
DATABASE_PATH = rc.get("bob.bio.face.vgg2-crops.directory")
DATABASE_EXTENSION = ".jpg"
......@@ -41,22 +40,5 @@ train_dataset = VGG2TorchDataset(
# train=False,
# )
train_dataloader = DataLoader(
train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
pin_memory=True,
num_workers=4,
)
validation_dataset = None
# For some reason we have an issue with
#validation_dataloader = None
# validation_dataloader = DataLoader(
# validation_dataset,
# batch_size=BATCH_SIZE,
# shuffle=False,
# pin_memory=False,
# num_workers=1,
# )
......@@ -12,10 +12,6 @@ import torchvision.transforms as transforms
from bob.bio.face.pytorch.preprocessing import get_standard_data_augmentation
if locals().get("BATCH_SIZE") is None:
BATCH_SIZE = 128
PROTOCOL = "vgg2-short"
DATABASE_PATH = rc.get("bob.bio.face.vgg2-crops.directory")
DATABASE_EXTENSION = ".jpg"
......@@ -43,20 +39,5 @@ validation_dataset = VGG2TorchDataset(
train=False,
)
train_dataloader = DataLoader(
train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
pin_memory=True,
num_workers=4,
)
validation_dataloader = DataLoader(
validation_dataset,
batch_size=BATCH_SIZE,
shuffle=False,
pin_memory=False,
num_workers=1,
)
# validation_dataloader = None
......@@ -12,10 +12,6 @@ import torchvision.transforms as transforms
from bob.bio.face.pytorch.preprocessing import get_standard_data_augmentation
if locals().get("BATCH_SIZE") is None:
BATCH_SIZE = 128
PROTOCOL = "vgg2-short"
DATABASE_PATH = rc.get("bob.bio.face.vgg2-crops.directory")
DATABASE_EXTENSION = ".jpg"
......@@ -43,20 +39,5 @@ train_dataset = VGG2TorchDataset(
# train=False,
# )
train_dataloader = DataLoader(
train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
pin_memory=True,
num_workers=2,
)
# validation_dataloader = DataLoader(
# validation_dataset,
# batch_size=batch_size,
# shuffle=False,
# pin_memory=False,
# num_workers=1,
# )
validation_dataset = None
......@@ -3,51 +3,42 @@ BASE_PATH=/idiap/temp/tpereira/2.FRDemographics/regularization/models/contrastiv
#COMMAND=./bob/bio/demographics/script/train_regularization_level_fairness_commands.py
BACKBONE=iresnet34
#jman submit --name O.IRESNET34 -q gpu --sge-extra-args="-l hostname=vgni001|vgni00[1-9]|vgni01[0-9]|vgni02[0-9]|vgni03[0-2]" --
BACKBONE=iresnet50
#jman submit --name C.IRESNET50 -q gpu --sge-extra-args="-l hostname=vgni001|vgni00[1-9]|vgni01[0-9]|vgni02[0-9]|vgni03[0-2]" --
#./bin/bob bio demographics train-regularization-level-fairness train-contrastive \
# $BASE_PATH/$BACKBONE \
# --database ./bob/bio/demographics/config/train/databases/mobio/mobio-male-female.py \
# --identity-backbone ./bob/bio/demographics/config/train/backbones/$BACKBONE.py \
# --training-config ./bob/bio/demographics/config/train/training_configs/regular_sgd_crossentropy.py \
# --max-epochs 50 --batch-size 64 \
# --max-positive-pairs-per-subject 20 --negative-pairs-per-subject 20
./bin/bob bio demographics train-regularization-level-fairness train-contrastive \
$BASE_PATH/$BACKBONE \
BACKBONE=iresnet100
jman submit --name C.IRESNET100 -q gpu --sge-extra-args="-l hostname=vgni001|vgni00[1-9]|vgni01[0-9]|vgni02[0-9]|vgni03[0-2]" -- ./bin/bob bio demographics train-regularization-level-fairness train-contrastive \
$BASE_PATH/$BACKBONE \
--database ./bob/bio/demographics/config/train/databases/mobio/mobio-male-female.py \
--identity-backbone ./bob/bio/demographics/config/train/backbones/$BACKBONE.py \
--training-config ./bob/bio/demographics/config/train/training_configs/regular_sgd_crossentropy.py \
--max-epochs 30 --batch-size 64
--max-epochs 50 --batch-size 64 \
--max-positive-pairs-per-subject 20 --negative-pairs-per-subject 20
#BACKBONE=iresnet50
#jman submit --name O.IRESNET50 -q gpu --sge-extra-args="-l hostname=vgni001|vgni00[1-9]|vgni01[0-9]|vgni02[0-9]|vgni03[0-2]" -- ./bin/bob bio demographics train-regularization-level-fairness train-contrastive \
# $BASE_PATH/$BACKBONE \
# --database ./bob/bio/demographics/config/train/databases/vgg2/vgg2-full.py \
# --identity-backbone ./bob/bio/demographics/config/train/backbones/$BACKBONE.py \
# --demographic-backbone ./bob/bio/demographics/config/train/backbones/iresnet34.py \
# --head ./bob/bio/demographics/config/train/heads/arcface.py \
# --training-config ./bob/bio/demographics/config/train/training_configs/regular_sgd_crossentropy.py \
# --identity-factor 1 \
# --orthogonality-factor 1 \
# --demographic-epochs 20 \
# --identity-epochs 3 \
# --max-epochs 30
#BACKBONE=iresnet100
BACKBONE=iresnet34
#jman submit --name O.IRESNET100 -q gpu --sge-extra-args="-l hostname=vgni001|vgni00[1-9]|vgni01[0-9]|vgni02[0-9]|vgni03[0-2]" -- ./bin/bob bio demographics train-regularization-level-fairness train-contrastive \
#jman submit --name C.IRESNET34 -q gpu --sge-extra-args="-l hostname=vgni001|vgni00[1-9]|vgni01[0-9]|vgni02[0-9]|vgni03[0-2]" -- ./bin/bob bio demographics train-regularization-level-fairness train-contrastive \
# $BASE_PATH/$BACKBONE \
# --database ./bob/bio/demographics/config/train/databases/vgg2/vgg2-full.py \
# --database ./bob/bio/demographics/config/train/databases/mobio/mobio-male-female.py \
# --identity-backbone ./bob/bio/demographics/config/train/backbones/$BACKBONE.py \
# --demographic-backbone ./bob/bio/demographics/config/train/backbones/iresnet34.py \
# --head ./bob/bio/demographics/config/train/heads/arcface.py \
# --training-config ./bob/bio/demographics/config/train/training_configs/regular_sgd_crossentropy.py \
# --identity-factor 1 \
# --orthogonality-factor 1 \
# --demographic-epochs 20 \
# --identity-epochs 3 \
#--max-epochs 30
# --max-epochs 50 --batch-size 128 \
# --max-positive-pairs-per-subject 20 --negative-pairs-per-subject 20
from tokenize import group
from bob.pipelines import wrap
from sklearn.pipeline import make_pipeline
......@@ -12,7 +13,11 @@ def facecrop_pipeline(database, preprocessor, output_dir, dask_client):
transform_extra_arguments=transform_extra_arguments,
)
)
pipeline = wrap(["checkpoint", "dask"], pipeline, features_dir=output_dir,)
pipeline = wrap(
["checkpoint", "dask"],
pipeline,
features_dir=output_dir,
)
# pipeline = make_pipeline(
# wrap(["sample", "checkpoint", "dask"], preprocessor, features_dir=output_dir,)
......@@ -29,4 +34,16 @@ def facecrop_pipeline(database, preprocessor, output_dir, dask_client):
if hasattr(database, "treferences"):
pipeline.transform(database.treferences()).compute(scheduler=dask_client)
if hasattr(database, "references"):
pipeline.transform(database.references(group="dev")).compute(
scheduler=dask_client
)
pipeline.transform(database.references(group="eval")).compute(
scheduler=dask_client
)
if hasattr(database, "probes"):
pipeline.transform(database.probes(group="dev")).compute(scheduler=dask_client)
pipeline.transform(database.probes(group="eval")).compute(scheduler=dask_client)
pass
......@@ -20,7 +20,7 @@ class SimpleBalanceModel(BackboneHeadModel):
identity_head=None,
loss_fn=None,
optimizer_fn=None,
backbone_checkpoint_path=None,
backbone_checkpoint_file=None,
max_epochs=500,
**kwargs,
):
......@@ -34,17 +34,9 @@ class SimpleBalanceModel(BackboneHeadModel):
self.max_epochs = max_epochs
self.backbone_checkpoint_path = backbone_checkpoint_path
self.backbone_checkpoint_file = backbone_checkpoint_file
# self.last_op = None
def training_epoch_end(self, training_step_outputs):
if self.backbone_checkpoint_path:
state = self.backbone.state_dict()
torch.save(state, self.backbone_checkpoint_path)
pass
def training_step(self, batch, batch_idx):
data = batch["data"]
......
from turtle import forward
from torch.nn import Module, Linear
import pytorch_lightning as pl
import torch
......@@ -9,6 +10,7 @@ import math
import scipy
import itertools
from bob.bio.demographics.regularizers import demographic
import os
class LLRPytorchCalibration(torch.nn.Module):
......@@ -34,17 +36,27 @@ class ContrastiveModel(pl.LightningModule):
self,
backbone,
n_demographics,
backbone_checkpoint_path=None,
backbone_checkpoint_file=None,
weight_contrastive_loss=1.0,
weight_calibration_loss=1.0,
demographic_weights=0,
**kwargs,
):
super(ContrastiveModel, self).__init__(
**kwargs,
)
self.backbone_checkpoint_path = backbone_checkpoint_path
self.backbone_checkpoint_file = backbone_checkpoint_file
self.backbone = backbone
# self.loss_fn = torch.nn.CosineEmbeddingLoss(margin=0.0, reduction="none")
self.loss_fn = torch.nn.BCELoss()
# Weights
self.weight_contrastive_loss = weight_contrastive_loss
self.weight_calibration_loss = weight_calibration_loss
self.demographic_weights = demographic_weights
## Losses
self.calibration_loss = torch.nn.BCELoss()
self.contrastive_loss = torch.nn.CosineEmbeddingLoss(margin=0.5)
# One logistic regressor regressor per demographic
self.calibrators = []
......@@ -54,8 +66,54 @@ class ContrastiveModel(pl.LightningModule):
for _ in range(n_demographics):
self.calibrators.append(LLRPytorchCalibration())
# Switchers, turn on-off varibables only once
self._switcher_train_backbone = True
self._switcher_train_llr = True
def training_epoch_end(self, training_step_outputs):
if self.backbone_checkpoint_file is not None:
torch.save(
self.backbone.state_dict(),
os.path.join(self.backbone_checkpoint_file),
)
def switch(self, model, flag):
if isinstance(model, list):
models = []
for m in model:
models.append(self.switch(m, flag))
return models
model.train(flag)
# model.requires_grad = flag
for p in model.parameters():
p.requires_grad = flag
return model
def define_step(self):
if self.current_epoch < 1:
# Switching only once
if self._switcher_train_llr:
# Training the calibrator
self.backbone = self.switch(self.backbone, False)
self.calibrators = self.switch(self.calibrators, True)
self._switch_train_llr = False
else:
if self._switcher_train_backbone:
self.backbone = self.switch(self.backbone, True)
self.calibrators = self.switch(self.calibrators, False)
self._switcher_train_backbone = False
def training_step(self, batch, batch_idx):
_ = self.define_step()
# tensorboard = self.logger.experiment
image_a = batch["data"][0]
image_b = batch["data"][1]
......@@ -66,16 +124,21 @@ class ContrastiveModel(pl.LightningModule):
embedding_a = self.backbone(image_a)
embedding_b = self.backbone(image_b)
# loss = self.loss_fn(embedding_a, embedding_b, label)
new_label = torch.clone(label) # Label from cosine embedding is from -1 to 1
new_label[torch.where(label == 0)[0]] = -1
contrastive_loss = self.contrastive_loss(embedding_a, embedding_b, new_label)
self.log(f"train/contrastive_loss", contrastive_loss)
scores = torch.nn.functional.cosine_similarity(embedding_a, embedding_b)
# For each calibrator
accumulated_loss = 0
calibration_loss = []
for i in range(len(self.calibrators)):
indexes = torch.where(demography == i)[0]
demographic_label = label[indexes]
demographic_scores = scores[indexes]
genuine_scores = demographic_scores[torch.where(demographic_label == 1)[0]]
impostor_scores = demographic_scores[torch.where(demographic_label == 0)[0]]
......@@ -85,74 +148,107 @@ class ContrastiveModel(pl.LightningModule):
if len(genuine_scores) < 1 or len(impostor_scores) < 1:
continue
genuine_scores = torch.sigmoid(
self.calibrators[i](torch.unsqueeze(genuine_scores, 1))
# Score before calibration
(
before_calibration_scores,
before_calibration_labels,
) = self.concatenate_scores(genuine_scores, impostor_scores)
# note: I hate this unecessary matrix manipulations
acc_demographic = self.compute_llr_accuracy(
before_calibration_scores,
before_calibration_labels,
is_before_calibration=True,
)
impostor_scores = torch.sigmoid(
self.calibrators[i](torch.unsqueeze(impostor_scores, 1))
self.log(f"train/acc_demographic_before_calibration_{i}", acc_demographic)
genuine_scores = self.calibrators[i](torch.unsqueeze(genuine_scores, 1))
impostor_scores = self.calibrators[i](torch.unsqueeze(impostor_scores, 1))
# Score after calibration
(
after_calibration_scores,
after_calibration_labels,
) = self.concatenate_scores(genuine_scores, impostor_scores)
demographic_loss = self.calibration_loss(
after_calibration_scores[:, 0], after_calibration_labels
)
# accumulated_loss += demographic_loss
calibration_loss.append(demographic_loss)
self.log(f"train/demographic_loss_{i}", demographic_loss)
new_scores = torch.cat((genuine_scores, impostor_scores))
new_labels = torch.cat(
(
torch.ones(len(genuine_scores), device=self.my_device),
torch.zeros(len(impostor_scores), device=self.my_device),
)
# note: I hate this unecessary matrix manipulations
acc_demographic = self.compute_llr_accuracy(
after_calibration_scores[:, 0],
after_calibration_labels,
is_before_calibration=False,
)
self.log(f"train/acc_demographic_after_calibration_{i}", acc_demographic)
demographic_loss = self.loss_fn(new_scores[:, 0], new_labels)
accumulated_loss += demographic_loss
self.log(f"train/demographic_loss_{i}", demographic_loss)
total_loss = (
self.weight_contrastive_loss * contrastive_loss
+ self.weight_calibration_loss * sum(calibration_loss)
)
# Identity loss
# logits_demographic = self.head(embedding, label)
# loss_demographic = self.loss_fn(logits_demographic, label)
self.log("train/total_loss", accumulated_loss)
# acc = (
# sum(
# np.argmax(logits_demographic.cpu().detach().numpy(), axis=1)
# == label.cpu().detach().numpy()
# )
# / label.shape[0]
# )
# self.log("train/acc_demographic", acc)
self.log("train/total_loss", total_loss)
return accumulated_loss
return total_loss
"""
def validation_step(self, val_batch, batch_idx):
def concatenate_scores(self, genuine_scores, impostor_scores):
new_scores = torch.cat((genuine_scores, impostor_scores))
new_labels = torch.cat(
(
torch.ones(len(genuine_scores), device=self.my_device),
torch.zeros(len(impostor_scores), device=self.my_device),
)
)
return new_scores, new_labels
data = val_batch["data"]
# labels = val_batch["label"].cpu().detach().numpy()
labels = val_batch["demography"].cpu().detach().numpy()
def compute_llr_accuracy(self, scores, labels, is_before_calibration=True):
"""
Classical logistic regression threshold in 0.5
val_embedding = torch.nn.functional.normalize(self.forward(data), p=2)
val_embedding = val_embedding.cpu().detach().numpy()
n = val_embedding.shape[0]
# sigmoid(0) = 0.5, so I'm thresholding on 0
"""
# Distance with all vectors in a batch
pdist = scipy.spatial.distance.pdist(val_embedding, metric="cosine")
if is_before_calibration:
return sum(torch.eq(scores > 0, labels)) / len(labels)
else:
return sum(torch.eq(scores > 0.5, labels)) / len(labels)
# Squared matrix with infiity
predictions = np.ones((n, n)) * np.inf
def validation_step(self, val_batch, batch_idx):
# Filling the upper triangular (without the diagonal) with the pdist
predictions[np.triu_indices(n, k=1)] = pdist
image_a = val_batch["data"][0]
image_b = val_batch["data"][1]
# predicting
predictions = labels[np.argmin(predictions, axis=1)]
# Label is the demography
# demography = val_batch["demography"]
label = val_batch["label"]
accuracy = sum(predictions == labels) / n
self.log("validation/accuracy", accuracy)
"""
embedding_a = self.backbone(image_a)
embedding_b = self.backbone(image_b)
# loss = self.loss_fn(embedding_a, embedding_b, label)
scores = torch.nn.functional.cosine_similarity(embedding_a, embedding_b)
acc = self.compute_llr_accuracy(scores, label)
self.log(f"validation/acc", acc)
def configure_optimizers(self):
# return torch.optim.Adam(
# itertools.chain(
# self.parameters(),
# itertools.chain(*[list(c.parameters()) for c in self.calibrators]),
# ),
# lr=1e-2,
# )
return torch.optim.Adam(
itertools.chain(
self.parameters(),
itertools.chain(*[list(c.parameters()) for c in self.calibrators]),
),
itertools.chain(*[list(c.parameters()) for c in self.calibrators]),
lr=1e-1,
)
......@@ -65,7 +65,7 @@ class OrthogonalityModel(BackboneHeadModel):
optimizer_fn=None,
identity_factor=1.0,
orthogonality_factor=1.0,
backbone_checkpoint_path=None,
backbone_checkpoint_file=None,
demographic_epochs=30,
identity_epochs=50,
**kwargs,
......@@ -93,7 +93,7 @@ class OrthogonalityModel(BackboneHeadModel):
# demographic_epochs + identity_epochs + ortogonality_epochs
# ) # 3. Train ortogonality
self.backbone_checkpoint_path = backbone_checkpoint_path
self.backbone_checkpoint_file = backbone_checkpoint_file
self.last_op = None
# Control the networks that will be updated
......@@ -101,14 +101,6 @@ class OrthogonalityModel(BackboneHeadModel):
self.identity_switch = False
self.orthogonality_switch = False
def training_epoch_end(self, training_step_outputs):
if self.backbone_checkpoint_path:
state = self.identity_backbone.state_dict()
torch.save(state,