From 27898ca94737683c3c479c0d64a4580da29001a3 Mon Sep 17 00:00:00 2001 From: ageorge Date: Fri, 29 Mar 2019 11:51:29 +0100 Subject: [PATCH 1/4] Added a multi-channel extension of DeepPixBiS and its extractor --- .../pytorch/architectures/MCDeepPixBiS.py | 102 +++++++++++++++ bob/learn/pytorch/architectures/__init__.py | 1 + .../pytorch/extractor/image/MCDeepPixBiS.py | 119 ++++++++++++++++++ bob/learn/pytorch/extractor/image/__init__.py | 1 + bob/learn/pytorch/test/test.py | 17 +++ 5 files changed, 240 insertions(+) create mode 100644 bob/learn/pytorch/architectures/MCDeepPixBiS.py create mode 100644 bob/learn/pytorch/extractor/image/MCDeepPixBiS.py diff --git a/bob/learn/pytorch/architectures/MCDeepPixBiS.py b/bob/learn/pytorch/architectures/MCDeepPixBiS.py new file mode 100644 index 0000000..fea3fb1 --- /dev/null +++ b/bob/learn/pytorch/architectures/MCDeepPixBiS.py @@ -0,0 +1,102 @@ +import torch +from torch import nn +from torchvision import models +import numpy as np + + +class MCDeepPixBiS(nn.Module): + + """ The class defining Multi-Channel Deep Pixelwise Binary Supervision for Face Presentation + Attack Detection: + + This extends the following paper to multi-channel/ multi-spectral images with cross modal pretraining. + + Reference: Anjith George and Sébastien Marcel. "Deep Pixel-wise Binary Supervision for + Face Presentation Attack Detection." In 2019 International Conference on Biometrics (ICB).IEEE, 2019. + + The initialization uses `Cross modality pre-training` idea from the following paper: + + Wang L, Xiong Y, Wang Z, Qiao Y, Lin D, Tang X, Van Gool L. Temporal segment networks: + Towards good practices for deep action recognition. InEuropean conference on computer + vision 2016 Oct 8 (pp. 20-36). Springer, Cham. + + + Attributes + ---------- + pretrained: bool + If set to `True` uses the pretrained DenseNet model as the base. If set to `False`, the network + will be trained from scratch. + default: True + num_channels: int + Number of channels in the input. + """ + + def __init__(self, pretrained=True, num_channels=4): + + """ Init function + + Parameters + ---------- + pretrained: bool + If set to `True` uses the pretrained densenet model as the base. Else, it uses the default network + default: True + num_channels: int + Number of channels in the input. + """ + super(MCDeepPixBiS, self).__init__() + + dense = models.densenet161(pretrained=pretrained) + + features = list(dense.features.children()) + + temp_layer = features[0] + + # No bias in this architecture + + mean_weight = np.mean(temp_layer.weight.data.detach().numpy(),axis=1) # for 96 filters + + new_weight = np.zeros((96,num_channels,7,7)) + + for i in range(num_channels): + new_weight[:,i,:,:]=mean_weight + + features[0]=nn.Conv2d(num_channels, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) + + features[0].weight.data = torch.Tensor(new_weight) + + self.enc = nn.Sequential(*features[0:8]) + + self.dec=nn.Conv2d(384, 1, kernel_size=1, padding=0) + + self.linear=nn.Linear(14*14,1) + + + def forward(self, x): + """ Propagate data through the network + + Parameters + ---------- + img: :py:class:`torch.Tensor` + The data to forward through the network. Expects Multi-channel images of size num_channelsx224x224 + + Returns + ------- + dec: :py:class:`torch.Tensor` + Binary map of size 1x14x14 + op: :py:class:`torch.Tensor` + Final binary score. + + """ + enc = self.enc(x) + + dec=self.dec(enc) + + dec=nn.Sigmoid()(dec) + + dec_flat=dec.view(-1,14*14) + + op=self.linear(dec_flat) + + op=nn.Sigmoid()(op) + + return dec,op diff --git a/bob/learn/pytorch/architectures/__init__.py b/bob/learn/pytorch/architectures/__init__.py index 966a5ec..a487143 100644 --- a/bob/learn/pytorch/architectures/__init__.py +++ b/bob/learn/pytorch/architectures/__init__.py @@ -8,6 +8,7 @@ from .MCCNNv2 import MCCNNv2 from .FASNet import FASNet from .DeepMSPAD import DeepMSPAD from .DeepPixBiS import DeepPixBiS +from .MCDeepPixBiS import MCDeepPixBiS from .DCGAN import DCGAN_generator from .DCGAN import DCGAN_discriminator diff --git a/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py b/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py new file mode 100644 index 0000000..8c1a629 --- /dev/null +++ b/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py @@ -0,0 +1,119 @@ +import numpy as np +import torch +from torch.autograd import Variable + +import torchvision.transforms as transforms + +from bob.learn.pytorch.architectures import DeepPixBiS +from bob.bio.base.extractor import Extractor + +import logging +logger = logging.getLogger("bob.learn.pytorch") + +class MCDeepPixBiSExtractor(Extractor): + """ The class implementing the score computation for MCDeepPixBiS architecture. + + Attributes + ---------- + network: :py:class:`torch.nn.Module` + The network architecture + transforms: :py:mod:`torchvision.transforms` + The transform from numpy.array to torch.Tensor + scoring_method: str + The scoring method to be used to get the final score, + available methods are ['pixel_mean','binary','combined']. + + """ + + def __init__(self, transforms = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]), model_file=None, num_channels=8, scoring_method='pixel_mean'): + + + """ Init method + + Parameters + ---------- + model_file: str + The path of the trained PAD network to load + transforms: :py:mod:`torchvision.transforms` + Tranform to be applied on the image + scoring_method: str + The scoring method to be used to get the final score, + available methods are ['pixel_mean','binary','combined']. + + """ + + Extractor.__init__(self, skip_extractor_training=True) + + # model + self.transforms = transforms + self.scoring_method = scoring_method + self.num_channels =num_channels + self.network = MCDeepPixBiS(pretrained=False, num_channels=self.num_channels) + self.available_scoring_methods=['pixel_mean','binary','combined'] + + logger.debug('Scoring method is : {}'.format(self.scoring_method.upper())) + + if model_file is None: + # do nothing (used mainly for unit testing) + logger.debug("No pretrained file provided") + pass + else: + + + # With the new training + logger.debug('Starting to load the pretrained PAD model') + + try: + cp = torch.load(model_file) + except: + try: + cp= torch.load(model_file,map_location=lambda storage,loc:storage) + except: + raise ValueError('Failed to load the model file : {}'.format(model_file)) + + if 'state_dict' in cp: + self.network.load_state_dict(cp['state_dict']) + elif: ## check this part + self.network.load_state_dict(cp) + else: + raise ValueError('Failed to load the state_dict for model file: {}'.format(model_file)) + + logger.debug('Loaded the pretrained PAD model') + + self.network.eval() + + def __call__(self, image): + """ Extract features from an image + + Parameters + ---------- + image : 3D :py:class:`numpy.ndarray` + The image to extract the score from. Its size must be num_channelsx224x224; + + Returns + ------- + output : float + The extracted feature is a scalar values ~1 for bonafide and ~0 for PAs + + """ + + input_image = np.rollaxis(np.rollaxis(image, 2),2) # changes to 128x128xnum_channels + input_image = self.transforms(input_image) + input_image = input_image.unsqueeze(0) + + output = self.network.forward(Variable(input_image)) + output_pixel = output[0].data.numpy().flatten() + output_binary = output[1].data.numpy().flatten() + + if self.scoring_method=='pixel_mean': + score=np.mean(output_pixel) + elif self.scoring_method=='binary': + score=np.mean(output_binary) + elif self.scoring_method=='combined': + score= (np.mean(output_pixel)+np.mean(output_binary))/2.0 + else: + raise ValueError('Scoring method {} is not implemented.'.format(self.scoring_method)) + + # output is a scalar score + + return score \ No newline at end of file diff --git a/bob/learn/pytorch/extractor/image/__init__.py b/bob/learn/pytorch/extractor/image/__init__.py index d019435..7f585c3 100644 --- a/bob/learn/pytorch/extractor/image/__init__.py +++ b/bob/learn/pytorch/extractor/image/__init__.py @@ -4,6 +4,7 @@ from .LightCNN29v2 import LightCNN29v2Extractor from .MCCNN import MCCNNExtractor from .MCCNNv2 import MCCNNv2Extractor from .FASNet import FASNetExtractor +from .MCDeepPixBiS import MCDeepPixBiSExtractor __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/learn/pytorch/test/test.py b/bob/learn/pytorch/test/test.py index 08e3a46..7a2de3b 100644 --- a/bob/learn/pytorch/test/test.py +++ b/bob/learn/pytorch/test/test.py @@ -95,6 +95,15 @@ def test_architectures(): assert output[0].shape == torch.Size([1, 1, 14, 14]) assert output[1].shape == torch.Size([1, 1]) + #MCDeepPixBiS + a = numpy.random.rand(1, 8, 224, 224).astype("float32") + t = torch.from_numpy(a) + from ..architectures import MCDeepPixBiS + net = MCDeepPixBiS(pretrained=False,num_channels=8) + output = net.forward(t) + assert output[0].shape == torch.Size([1, 1, 14, 14]) + assert output[1].shape == torch.Size([1, 1]) + # DCGAN d = numpy.random.rand(1, 3, 64, 64).astype("float32") t = torch.from_numpy(d) @@ -437,3 +446,11 @@ def test_extractors(): data = numpy.random.rand(3, 224, 224).astype("uint8") output = extractor(data) assert output.shape[0] == 1 + + # MCDeepPixBiS + from ..extractor.image import MCDeepPixBiSExtractor + extractor = MCDeepPixBiSExtractor(num_channels=8, scoring_method='pixel_mean') + # this architecture expects multi-channel images of size num_channelsx224x224 + data = numpy.random.rand(8, 224, 224).astype("uint8") + output = extractor(data) + assert output.shape[0] == 1 \ No newline at end of file -- GitLab From 00b15d32233cd0bb333d4029cea0707f8ceed447 Mon Sep 17 00:00:00 2001 From: ageorge Date: Fri, 29 Mar 2019 12:14:35 +0100 Subject: [PATCH 2/4] small fix --- bob/learn/pytorch/extractor/image/MCDeepPixBiS.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py b/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py index 8c1a629..b2358b1 100644 --- a/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py +++ b/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py @@ -59,22 +59,16 @@ class MCDeepPixBiSExtractor(Extractor): pass else: - # With the new training logger.debug('Starting to load the pretrained PAD model') try: cp = torch.load(model_file) except: - try: - cp= torch.load(model_file,map_location=lambda storage,loc:storage) - except: - raise ValueError('Failed to load the model file : {}'.format(model_file)) + raise ValueError('Failed to load the model file : {}'.format(model_file)) if 'state_dict' in cp: self.network.load_state_dict(cp['state_dict']) - elif: ## check this part - self.network.load_state_dict(cp) else: raise ValueError('Failed to load the state_dict for model file: {}'.format(model_file)) -- GitLab From 95a6b40b013681836e8d3b616bd238919fb2d9cc Mon Sep 17 00:00:00 2001 From: ageorge Date: Fri, 29 Mar 2019 12:45:43 +0100 Subject: [PATCH 3/4] small fix --- bob/learn/pytorch/extractor/image/MCDeepPixBiS.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py b/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py index b2358b1..235afbc 100644 --- a/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py +++ b/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py @@ -4,7 +4,7 @@ from torch.autograd import Variable import torchvision.transforms as transforms -from bob.learn.pytorch.architectures import DeepPixBiS +from bob.learn.pytorch.architectures import MCDeepPixBiS from bob.bio.base.extractor import Extractor import logging -- GitLab From 6d142c3b74358fbfdac3efe706699a4b1f0a13e2 Mon Sep 17 00:00:00 2001 From: ageorge Date: Fri, 29 Mar 2019 15:59:00 +0100 Subject: [PATCH 4/4] Changed default transform and fixes --- bob/learn/pytorch/extractor/image/MCDeepPixBiS.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py b/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py index 235afbc..1f8a28e 100644 --- a/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py +++ b/bob/learn/pytorch/extractor/image/MCDeepPixBiS.py @@ -25,7 +25,7 @@ class MCDeepPixBiSExtractor(Extractor): """ - def __init__(self, transforms = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]), model_file=None, num_channels=8, scoring_method='pixel_mean'): + def __init__(self, transforms = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.5], std=[0.225])]), model_file=None, num_channels=8, scoring_method='pixel_mean'): """ Init method @@ -91,7 +91,7 @@ class MCDeepPixBiSExtractor(Extractor): """ - input_image = np.rollaxis(np.rollaxis(image, 2),2) # changes to 128x128xnum_channels + input_image = np.rollaxis(np.rollaxis(image, 2),2) # changes to 224x224xnum_channels input_image = self.transforms(input_image) input_image = input_image.unsqueeze(0) @@ -110,4 +110,4 @@ class MCDeepPixBiSExtractor(Extractor): # output is a scalar score - return score \ No newline at end of file + return np.reshape(score,(1,-1)) -- GitLab