diff --git a/bob/pad/face/config/deep_pix_bis.py b/bob/pad/face/config/deep_pix_bis.py new file mode 100644 index 0000000000000000000000000000000000000000..19eda97c778acd4fae862f320f12c183c6ab5a00 --- /dev/null +++ b/bob/pad/face/config/deep_pix_bis.py @@ -0,0 +1,52 @@ +from sklearn.pipeline import Pipeline + +import bob.pipelines as mario + +from bob.bio.face.preprocessor import FaceCrop +from bob.bio.face.utils import get_default_cropped_positions +from bob.bio.video.transformer import VideoWrapper +from bob.pad.face.deep_pix_bis import DeepPixBisClassifier +from bob.pad.face.transformer import VideoToFrames + +database = globals().get("database") +if database is not None: + annotation_type = database.annotation_type + fixed_positions = database.fixed_positions +else: + annotation_type = None + fixed_positions = None + + +# Preprocessor # +cropped_image_size = (224, 224) +cropped_positions = get_default_cropped_positions( + "pad", cropped_image_size, annotation_type +) +cropper = FaceCrop( + cropped_image_size=cropped_image_size, + cropped_positions=cropped_positions, + color_channel="rgb", + fixed_positions=fixed_positions, + dtype="uint8", + annotator="mtcnn", +) + +preprocessor = VideoWrapper(cropper) +preprocessor = mario.wrap( + ["sample"], + preprocessor, + transform_extra_arguments=(("annotations", "annotations"),), +) + +# Classifier # +classifier = DeepPixBisClassifier(model_file="oulunpu-p1") +classifier = mario.wrap(["sample"], classifier) + + +pipeline = Pipeline( + [ + ("preprocessor", preprocessor), + ("video_to_frames", VideoToFrames()), + ("classifier", classifier), + ] +) diff --git a/bob/pad/face/config/svm_frames.py b/bob/pad/face/config/svm_frames.py index 64a672a8938a682926d697a2f208c8e5557b5e7c..68a9ea2d15120a914fd7bec1d88ef9616444e23c 100644 --- a/bob/pad/face/config/svm_frames.py +++ b/bob/pad/face/config/svm_frames.py @@ -6,11 +6,10 @@ import bob.pipelines as mario from bob.pad.face.transformer import VideoToFrames -preprocessor = globals().get("preprocessor") -extractor = globals().get("extractor") +preprocessor = globals()["preprocessor"] +extractor = globals()["extractor"] # Classifier # -frame_cont_to_array = VideoToFrames() param_grid = [ { @@ -29,10 +28,13 @@ classifier = mario.wrap( ) -# Pipeline # +# we put video_to_frames and classifier together in a pipeline +# so that the output of video_to_frames is not checkpointed! frames_classifier = Pipeline( - [("frame_cont_to_array", frame_cont_to_array), ("classifier", classifier)] + [("video_to_frames", VideoToFrames()), ("classifier", classifier)] ) + +# Pipeline # pipeline = Pipeline( [ ("preprocessor", preprocessor), diff --git a/bob/pad/face/deep_pix_bis.py b/bob/pad/face/deep_pix_bis.py new file mode 100644 index 0000000000000000000000000000000000000000..23abe6eefebdffcd8b6d4601dd9bfb7c8c4699a1 --- /dev/null +++ b/bob/pad/face/deep_pix_bis.py @@ -0,0 +1,281 @@ +import logging + +import numpy as np +import torch +import torchvision.transforms as vision_transforms + +from sklearn.base import BaseEstimator, ClassifierMixin +from torch import nn +from torchvision import models + +from bob.extension.download import get_file +from bob.io.image import to_matplotlib + +logger = logging.getLogger(__name__) + + +DEEP_PIX_BIS_PRETRAINED_MODELS = { + "oulunpu-p1": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_1_model_0_0-24844429.pth" + ], + "oulunpu-p2": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_2_model_0_0-4aae2f3a.pth" + ], + "oulunpu-p3-1": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_3_1_model_0_0-f0e70cf3.pth" + ], + "oulunpu-p3-2": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_3_2_model_0_0-92594797.pth" + ], + "oulunpu-p3-3": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_3_3_model_0_0-71e18149.pth" + ], + "oulunpu-p3-4": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_3_4_model_0_0-d7f666e5.pth" + ], + "oulunpu-p3-5": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_3_5_model_0_0-fc40ba69.pth" + ], + "oulunpu-p3-6": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_3_6_model_0_0-123a6c92.pth" + ], + "oulunpu-p4-1": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_4_1_model_0_0-5f8dc7cf.pth" + ], + "oulunpu-p4-2": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_4_2_model_0_0-168f2644.pth" + ], + "oulunpu-p4-3": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_4_3_model_0_0-db57e3b5.pth" + ], + "oulunpu-p4-4": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_4_4_model_0_0-e999b7e8.pth" + ], + "oulunpu-p4-5": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_4_5_model_0_0-dcd13b8b.pth" + ], + "oulunpu-p4-6": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_OULU_Protocol_4_6_model_0_0-96a1ab92.pth" + ], + "replaymobile": [ + "http://www.idiap.ch/software/bob/data/bob/bob.pad.face/deep_pix_bis_RM_grandtest_model_0_0-6761ca7e.pth" + ], +} +"A dictionary with the url paths to pre-trained weights of the DeepPixBis model." + + +class DeepPixBiS(nn.Module): + """The class defining Deep Pixelwise Binary Supervision for Face Presentation + Attack Detection: + + Reference: Anjith George and Sébastien Marcel. "Deep Pixel-wise Binary Supervision for + Face Presentation Attack Detection." In 2019 International Conference on Biometrics (ICB).IEEE, 2019. + + Attributes + ---------- + pretrained: bool + If set to `True` uses the pretrained DenseNet model as the base. If set to `False`, the network + will be trained from scratch. + """ + + def __init__(self, pretrained=True, **kwargs): + """ + Parameters + ---------- + pretrained: bool + If set to `True` uses the pretrained densenet model as the base. Else, it uses the default network + """ + super().__init__(**kwargs) + + dense = models.densenet161(pretrained=pretrained) + + features = list(dense.features.children()) + + self.enc = nn.Sequential(*features[0:8]) + + self.dec = nn.Conv2d(384, 1, kernel_size=1, padding=0) + + self.linear = nn.Linear(14 * 14, 1) + + def forward(self, x): + """Propagate data through the network + + Parameters + ---------- + img: :py:class:`torch.Tensor` + The data to forward through the network. Expects RGB image of size 3x224x224 + + Returns + ------- + dec: :py:class:`torch.Tensor` + Binary map of size 1x14x14 + op: :py:class:`torch.Tensor` + Final binary score. + + """ + enc = self.enc(x) + + dec = self.dec(enc) + + dec = nn.Sigmoid()(dec) + + dec_flat = dec.view(-1, 14 * 14) + + op = self.linear(dec_flat) + + op = nn.Sigmoid()(op) + + return dec, op + + +class DeepPixBisClassifier(BaseEstimator, ClassifierMixin): + """The class implementing the DeepPixBiS score computation""" + + def __init__( + self, + model_file=None, + transforms=None, + scoring_method="pixel_mean", + device=None, + threshold=0.8, + **kwargs, + ): + + """Init method + + Parameters + ---------- + model_file: str + The path of the trained PAD network to load or one of the keys to :py:attr:`DEEP_PIX_BIS_PRETRAINED_MODELS` + transforms: :py:mod:`torchvision.transforms` + Tranform to be applied on the image + scoring_method: str + The scoring method to be used to get the final score, + available methods are ['pixel_mean','binary','combined']. + threshold: float + The threshold to be used to binarize the output of the DeepPixBiS model. + This is not used in the normal bob.pad.base pipeline. + """ + super().__init__(**kwargs) + + if transforms is None: + transforms = vision_transforms.Compose( + [ + vision_transforms.ToTensor(), + vision_transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + ] + ) + + # model + self.transforms = transforms + self.model = None + self.scoring_method = scoring_method.lower() + if self.scoring_method not in ("pixel_mean", "binary", "combined"): + raise ValueError( + "Scoring method {} is not implemented.".format( + self.scoring_method + ) + ) + self.device = torch.device( + device or "cuda" if torch.cuda.is_available() else "cpu" + ) + self.threshold = threshold + + logger.debug( + "Scoring method is : {}".format(self.scoring_method.upper()) + ) + + if model_file in DEEP_PIX_BIS_PRETRAINED_MODELS: + model_urls = DEEP_PIX_BIS_PRETRAINED_MODELS[model_file] + filename = model_urls[0].split("/")[-1] + file_hash = ( + model_urls[0].split("/")[-1].split("-")[-1].split(".")[0] + ) + model_file = get_file( + filename, + model_urls, + cache_subdir="models", + file_hash=file_hash, + extract=False, + ) + + logger.debug("Using pretrained model {}".format(model_file)) + self.model_file = model_file + + def load_model(self): + if self.model is not None: + return + + cp = torch.load( + self.model_file, map_location=lambda storage, loc: storage + ) + + self.model = DeepPixBiS(pretrained=False) + self.model.load_state_dict(cp["state_dict"]) + self.place_model_on_device() + self.model.eval() + logger.debug("Loaded the pretrained PAD model") + + def predict_proba(self, images): + """Scores face images for PAD + + Parameters + ---------- + image : 3D :py:class:`numpy.ndarray` + The image to extract the score from. Its size must be 3x224x224; + + Returns + ------- + output : float + The output score is close to 1 for bonafide and 0 for PAs. + """ + self.load_model() + if self.device != torch.self.device("cuda"): + raise ValueError("Device {} is not supported.".format(self.device)) + + tensor_images = [] + for img in images: + img = to_matplotlib(img) + with torch.no_grad(): + img = self.transforms(img) + tensor_images.append(img) + + images = tensor_images = torch.stack(tensor_images).to(self.device) + + with torch.no_grad(): + outputs = self.model.forward(images) + + output_pixel = outputs[0].cpu().detach().numpy().mean(axis=(1, 2, 3)) + output_binary = outputs[1].cpu().detach().numpy().mean(axis=1) + + score = { + "pixel_mean": output_pixel, + "binary": output_binary, + "combined": (output_binary + output_pixel) / 2, + }[self.scoring_method] + + print(score) + return score + + def predict(self, X): + scores = self.predict_proba(X) + return np.int(scores > self.threshold) + + def fit(self, X, y=None): + """No training required for this model""" + return self + + def __getstate__(self): + # Handling unpicklable objects + d = self.__dict__.copy() + d["model"] = None + return d + + def _more_tags(self): + return {"requires_fit": False} + + def place_model_on_device(self): + if self.model is not None: + self.model.to(self.device) diff --git a/bob/pad/face/test/dummy/database.py b/bob/pad/face/test/dummy/database.py index 00cbeec41a817f115dea5f07dc00c7fc0f1c03c4..745cd6a838b846af3e4e7d4e83170dab5e8af8ef 100644 --- a/bob/pad/face/test/dummy/database.py +++ b/bob/pad/face/test/dummy/database.py @@ -8,7 +8,7 @@ from bob.bio.base.database.legacy import ( convert_names_to_lowlevel, ) from bob.bio.video import VideoLikeContainer -from bob.pad.base.pipelines.vanilla_pad.abstract_classes import Database +from bob.pad.base.pipelines.abstract_classes import Database from bob.pipelines import DelayedSample diff --git a/bob/pad/face/transformer/VideoToFrames.py b/bob/pad/face/transformer/VideoToFrames.py index 64f10e8f42dc30eba0a858332e634fd4c77f73d1..b9982e90be9a78c4e34009d01cb5a3d34272d9e6 100644 --- a/bob/pad/face/transformer/VideoToFrames.py +++ b/bob/pad/face/transformer/VideoToFrames.py @@ -23,18 +23,24 @@ class VideoToFrames(TransformerMixin, BaseEstimator): for frame, frame_id in zip(video, video.indices): if frame is None: continue + kw = ( + {"key": f"{sample.key}_{frame_id}"} + if hasattr(sample, "key") + else {} + ) new_sample = mario.Sample( frame, frame_id=frame_id, annotations=annotations.get(str(frame_id)), parent=sample, + **kw, ) output.append(new_sample) return output - def fit(self, X, y=None, **fit_params): + def fit(self, X, y=None): return self def _more_tags(self): - return {"requires_fit": False} + return {"requires_fit": False, "bob_checkpoint_features": False} diff --git a/conda/meta.yaml b/conda/meta.yaml index f22cdca655dee11d0d4d7d6c9632cd994027d124..661fa2269c2c46852dbe5af5b4eaac5254b6961d 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -32,6 +32,7 @@ requirements: - scikit-learn {{ scikit_learn }} - scikit-image {{ scikit_image }} - imageio-ffmpeg {{ imageio_ffmpeg }} + - pytorch {{ pytorch }} run: - python - setuptools @@ -39,6 +40,7 @@ requirements: - {{ pin_compatible('scikit-learn', min_pin='x.x') }} - {{ pin_compatible('scikit-image') }} - {{ pin_compatible('imageio-ffmpeg') }} + - {{ pin_compatible('pytorch') }} test: imports: diff --git a/doc/baselines.rst b/doc/baselines.rst index 17dad0aa23d6ba1dc1d6085792ce37f479235521..dfd774713082442cffd071340878d5c1cd85fc7a 100644 --- a/doc/baselines.rst +++ b/doc/baselines.rst @@ -14,18 +14,18 @@ algorithms implemented in ``bob.pad.face``. Running Baseline Experiments ---------------------------- -To run the baseline PAD experiments, the ``bob pad vanilla-pad`` command is used. +To run the baseline PAD experiments, the ``bob pad run-pipeline`` command is used. To see the description of the command, you can type in the console: .. code-block:: sh - $ bob pad vanilla-pad --help + $ bob pad run-pipeline --help -This command is explained in more detail in :ref:`bob.pad.base <bob.pad.base.vanilla_pad_features>`. +This command is explained in more detail in :ref:`bob.pad.base <bob.pad.base.features>`. Usually, it is a good idea to have at least verbose level 2 (i.e., calling -``bob pad vanilla-pad --verbose --verbose``, or the short version -``bob pad vanilla-pad -vv``). +``bob pad run-pipeline --verbose --verbose``, or the short version +``bob pad run-pipeline -vv``). .. note:: **Running in Parallel** diff --git a/doc/index.rst b/doc/index.rst index 27fc0ff46935ee8c2e3da4246b9e8ec76f5e14ec..5c4200195e802638bdefa466eee700210cff16a2 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -7,7 +7,7 @@ Library for Facial Presentation Attack Detection (PAD) ======================================================== The Facial Presentation Attack Detection Library is an open-source tool consisting of a -series of plugins for bob.pad.base_, our open-source biometric recognition +series of plugins for bob.pad.base_, our open-source presentation attack detection platform. As a result, it is fully extensible using bob.pad.base_ documented types and techniques. Please refer to the manual of that package for a thorough introduction. In this guide, we focus on details concerning diff --git a/doc/resources.rst b/doc/resources.rst index b988ac996550c8171dfff131b10eb0024b76eb43..e8e7925e44e948a4bd5b27211d12eaaa6fad34e4 100644 --- a/doc/resources.rst +++ b/doc/resources.rst @@ -19,7 +19,7 @@ Databases These configuration files/resources contain parameters of available databases. The configuration files contain at least the following arguments of the -``bob pad vanilla-pad`` command: +``bob pad run-pipeline`` command: * ``database`` * ``protocol`` @@ -54,6 +54,6 @@ Available face PAD systems These configuration files/resources contain parameters of available face PAD systems/algorithms. The configuration files contain at least the following arguments of the -``bob pad vanilla-pad`` command: +``bob pad run-pipeline`` command: * ``pipeline`` containing zero, one, or more Transformers and one Classifier diff --git a/requirements.txt b/requirements.txt index b9edf490978c937b1533015aedb6860da3b21fdb..3bd51dbb637617381a2d369faf6d780df623d362 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ bob.bio.video scikit-learn scikit-image imageio-ffmpeg +pytorch