diff --git a/bob/pad/face/config/preprocessor/video_face_crop_align_block_patch.py b/bob/pad/face/config/preprocessor/video_face_crop_align_block_patch.py index aa96bdf5b04834e4e550f122166c10050c1b7a3b..634bbdc980cfde3ecfe7495d920178ea57a0e4ad 100644 --- a/bob/pad/face/config/preprocessor/video_face_crop_align_block_patch.py +++ b/bob/pad/face/config/preprocessor/video_face_crop_align_block_patch.py @@ -124,3 +124,8 @@ video_face_crop_align_bw_ir_d_channels_3x128x128 = VideoFaceCropAlignBlockPatch( return_multi_channel_flag = True, block_patch_preprocessor = _block_patch_128x128) +# This instance is similar to above, but will return a **vectorized** patch: +video_face_crop_align_bw_ir_d_channels_3x128x128_vect = VideoFaceCropAlignBlockPatch(preprocessors = _preprocessors, + channel_names = _channel_names, + return_multi_channel_flag = False, + block_patch_preprocessor = _block_patch_128x128) diff --git a/doc/mc_autoencoder_pad.rst b/doc/mc_autoencoder_pad.rst index 7cd6d3bee6947d2d43dd2f4a09f61bce7af5b7fb..4c40ed687e0aa2a91da655313019828d141fdbe6 100644 --- a/doc/mc_autoencoder_pad.rst +++ b/doc/mc_autoencoder_pad.rst @@ -105,3 +105,72 @@ To prepare the training data one can use the following command: Once above script is completed, the MC data suitable for autoencoder fine-tuning is located in the folder ``<PATH_TO_STORE_THE_RESULTS>/preprocessed/``. Now the autoencoder can be fine-tuned. Again, the fine-tuning procedure is explained in the **Convolutional autoencoder** section in the documentation of the ``bob.learn.pytorch`` package. + +3. Train an MLP using multi-channel autoencoder latent embeddings from WMCA +================================================================================= + +Once auto-encoders are pre-trained and fine-tuned, the latent embeddings can be computed passing the multi-channel (MC) BW-NIR-D images from the WMCA database through the encoder, see [NGM19]_ for more details. These latent embeddings (feature vectors) are next used to train an MLP classifying input MC samples into bona-fide or attack classes. + +The first step to be done is the registration of an extractor computing latent embeddings. To do so, a file defining an instance of **MultiNetPatchExtractor** class must be created: + +.. code-block:: sh + + from bob.ip.pytorch_extractor import MultiNetPatchExtractor + from bob.bio.video.utils import FrameSelector + from bob.bio.video.extractor import Wrapper + from torchvision import transforms + from bob.learn.pytorch.architectures import ConvAutoencoder + + # transform to be applied to input patches: + TRANSFORM = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]) + + # use latent embeddings in the feature extractor: + NETWORK_AE = ConvAutoencoder(return_latent_embedding = True) + + # use specific/unique model for each patch. Models pre-trained on CelebA and fine-tuned on BATL: + MODEL_FILE = ["SUBSTITUTE_THE_PATH_TO_PRETRAINED_AE_MODEL"] + + PATCHES_NUM = [0] # patches to be used in the feature extraction process + PATCH_RESHAPE_PARAMETERS = [3, 128, 128] # reshape vectorized patches to this dimensions before passing to the Network + + _image_extractor = MultiNetPatchExtractor(transform = TRANSFORM, + network = NETWORK_AE, + model_file = MODEL_FILE, + patches_num = PATCHES_NUM, + patch_reshape_parameters = PATCH_RESHAPE_PARAMETERS, + color_input_flag = True, + urls = None, + archive_extension = '.tar.gz') + + extractor = Wrapper(extractor = _image_extractor, + frame_selector = FrameSelector(selection_style = "all")) + +Suppose, above configuration file is located in ``bob.pad.face`` package in the following location: ``bob/pad/face/config/extractor/multi_net_patch_extractor.py``. Then it can be registered in ``setup.py`` by adding the following string to the list of registered extractors ``bob.pad.extractor``: + +.. code-block:: sh + + 'multi-net-patch-extractor = bob.pad.face.config.extractor.multi_net_patch_extractor:extractor', + + +Once an extractor is registered, to compute the latent embeddings (encoder output) the following command can be used: + +.. code-block:: sh + + ./bin/spoof.py \ # spoof.py is used to extract embeddings + batl-db-rgb-ir-d-grandtest \ # WMCA database instance allowing to load RGB-NIR-D channels + lbp-svm \ # required by spoof.py, but unused + --preprocessor video-face-crop-align-bw-ir-d-channels-3x128x128-vect \ # entry point defining preprocessor + --extractor multi-net-patch-extractor \ # entry point defining extractor + --skip-projector-training --skip-projection --skip-score-computation --allow-missing-files \ # execute preprocessing and extraction only + --grid idiap \ # use grid, for Idiap users only, remove otherwise + --sub-directory <PATH_TO_STORE_THE_RESULTS> # define your path here + +.. note:: + + Make sure the ``bob.learn.pytorch`` and ``bob.ip.pytorch_extractor`` packages are installed before running above command. + +Once above script is completed, the MC latent encodings to be used for MLP training are located in the folder ``<PATH_TO_STORE_THE_RESULTS>/extracted/``. +Again, the training procedure is explained in the **MLP** section in the documentation of the ``bob.learn.pytorch`` package. + diff --git a/setup.py b/setup.py index 2b2339db588cfdf150d9712353b824b0bc80eb12..a1824b9f3e5dbc42f4e06a3b8e1b356b331a6e49 100644 --- a/setup.py +++ b/setup.py @@ -123,6 +123,7 @@ setup( 'bw-face-detect-mtcnn = bob.pad.face.config.preprocessor.video_face_crop:bw_face_detect_mtcnn', # detect faces locally, return BW image 'rgb-face-detect-check-quality-128x128 = bob.pad.face.config.preprocessor.face_feature_crop_quality_check:face_feature_0_128x128_crop_rgb', # detect faces locally replacing database annotations, also check face quality by trying to detect the eyes in cropped face. 'video-face-crop-align-bw-ir-d-channels-3x128x128 = bob.pad.face.config.preprocessor.video_face_crop_align_block_patch:video_face_crop_align_bw_ir_d_channels_3x128x128', # Extract a BW-NIR-D patch of size (3 x 128 x 128) containing aligned face + 'video-face-crop-align-bw-ir-d-channels-3x128x128-vect = bob.pad.face.config.preprocessor.video_face_crop_align_block_patch:video_face_crop_align_bw_ir_d_channels_3x128x128_vect', # Extract a BW-NIR-D **vectorized** patch of size containing aligned face ], # registered extractors: