Commit 38a5586f authored by Anjith GEORGE's avatar Anjith GEORGE

Added trainer and trainer script

parent c2a2f4b8
from torchvision import transforms
from bob.learn.pytorch.architectures import MCCNN
from bob.learn.pytorch.datasets import DataFolder
from bob.pad.face.database import BatlPadDatabase
# Load the dataset
""" The steps in initializing the dataset is as follows
1. Initialize a databae instance, with the protocol, groups and number of frames
(currently for the ones in 'bob.pad.face', and point 'data_folder_train' to the preprocessed directory )
Note: Here we assume that we have already preprocessed the with `` script and dumped it to location
pointed to by 'data_folder_train'.
2. Specify the transform to be used on the images. It can be instances of `torchvision.transforms.Compose` or custom functions.
3. Initialize the `data_folder` class with the database instance and all other parameters. This dataset instance is used in
the trainer class
4. Initialize the network architecture with required arguments.
train_groups=['train'] # only 'train' group is used for training the network
protocols="grandtest-color*depth*infrared*thermal-{}".format(frames) # makeup is excluded anyway here
img_transform_train = transforms.Compose([transforms.ToPILImage(),transforms.RandomHorizontalFlip(),transforms.ToTensor()])# Add p=0.5 later
bob_hldi_instance_train = BatlPadDatabase(
landmark_detect_method="mtcnn", # detect annotations using mtcnn
exclude_pai_all_sets=True, # exclude makeup from all the sets, which is the default behavior for grandtest protocol
dataset = DataFolder(data_folder=data_folder_train,
purposes=['real', 'attack'],
# Load the architecture
# Specify other training parameters
batch_size = 64
seed = 3
output_dir = 'training_mccn'
use_gpu = False
verbose = 2
Note: Running in GPU
jman submit --queue gpu \
--name mccnn \
--log-dir /idiap/user/ageorge/WORK/COMMON_ENV_PAD_BATL_DB/training_mccn/logs/ \
--environment="PYTHONUNBUFFERED=1" -- \
./bin/ \
/idiap/user/ageorge/WORK/COMMON_ENV_PAD_BATL_DB/src/bob.learn.pytorch/bob/learn/pytorch/config/mccnn/ --use-gpu -vvv
Note: Running in cpu
./bin/ \
/idiap/user/ageorge/WORK/COMMON_ENV_PAD_BATL_DB/src/bob.learn.pytorch/bob/learn/pytorch/config/mccnn/ -vvv
#!/usr/bin/env python
# encoding: utf-8
""" Train a MCCNN for muti-channel face PAD
%(prog)s <configuration>
[--model=<string>] [--batch-size=<int>] [--epochs=<int>]
[--learning-rate=<float>] [--seed=<int>]
[--output-dir=<path>] [--use-gpu] [--verbose ...]
<configuration> A configuration file, defining the dataset and the network
-h, --help Shows this help message and exits
--model=<string> Filename of the model to load (if any).
--batch-size=<int> Batch size [default: 64]
--epochs=<int> Number of training epochs [default: 20]
--learning-rate=<float> Learning rate [default: 0.01]
-S, --seed=<int> The random seed [default: 3]
-o, --output-dir=<path> Dir to save stuff [default: training]
-g, --use-gpu Use the GPU
-v, --verbose Increase the verbosity (may appear multiple times).
Note that arguments provided directly by command-line will override the ones in the configuration file.
To run the training process
$ %(prog)s
See '%(prog)s --help' for more information.
import os, sys
import pkg_resources
import torch
import numpy
from docopt import docopt
import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")
from bob.extension.config import load
from bob.learn.pytorch.trainers import MCCNNTrainer
from bob.learn.pytorch.utils import get_parameter
version = pkg_resources.require('bob.learn.pytorch')[0].version
def main(user_input=None):
# Parse the command-line arguments
if user_input is not None:
arguments = user_input
arguments = sys.argv[1:]
prog = os.path.basename(sys.argv[0])
completions = dict(prog=prog, version=version,)
args = docopt(__doc__ % completions,argv=arguments,version='Train a MCCNN (%s)' % version,)
# load configuration file
configuration = load([os.path.join(args['<configuration>'])])
# get the pre-trained model file, if any
model = args['--model']
if hasattr(configuration, 'model'):
model = configuration.model
# get various parameters, either from config file or command-line
batch_size = get_parameter(args, configuration, 'batch_size', 64)
epochs = get_parameter(args, configuration, 'epochs', 20)
learning_rate = get_parameter(args, configuration, 'learning_rate', 0.01)
seed = get_parameter(args, configuration, 'seed', 3)
output_dir = get_parameter(args, configuration, 'output_dir', 'training')
use_gpu = get_parameter(args, configuration, 'use_gpu', False)
verbosity_level = get_parameter(args, configuration, 'verbose', 0)
bob.core.log.set_verbosity_level(logger, verbosity_level)
# print parameters
logger.debug("Model file = {}".format(model))
logger.debug("Batch size = {}".format(batch_size))
logger.debug("Epochs = {}".format(epochs))
logger.debug("Learning rate = {}".format(learning_rate))
logger.debug("Seed = {}".format(seed))
logger.debug("Output directory = {}".format(output_dir))
logger.debug("Use GPU = {}".format(use_gpu))
# process on the arguments / options
if use_gpu:
if torch.cuda.is_available() and not use_gpu:
logger.warn("You have a CUDA device, so you should probably run with --use-gpu")
# get data
if hasattr(configuration, 'dataset'):
dataloader =, batch_size=batch_size, shuffle=True)"There are {} training samples".format(len(configuration.dataset)))
logger.error("Please provide a dataset in your configuration file !")
# train the network
if hasattr(configuration, 'network'):
trainer = MCCNNTrainer(, batch_size=batch_size, use_gpu=use_gpu, adapted_layers='conv1-block1-group1', adapt_reference_channel=False, verbosity_level=verbosity_level)
trainer.train(dataloader, n_epochs=epochs, learning_rate=learning_rate, output_dir=output_dir, model=model)
logger.error("Please provide a network in your configuration file !")
......@@ -183,7 +183,32 @@ def test_CNNtrainer():
class DummyDataSetMCCNN(Dataset):
def __init__(self):
def __len__(self):
return 100
def __getitem__(self, idx):
data = numpy.random.rand(4, 128, 128).astype("float32")
label = numpy.random.randint(2)
sample = data, label
return sample
def test_MCCNNtrainer():
from ..architectures import MCCNN
net = MCCNN(num_channels=4)
dataloader =, batch_size=32, shuffle=True)
from ..trainers import MCCNNTrainer
trainer = MCCNNTrainer(net, verbosity_level=3)
trainer.train(dataloader, n_epochs=1, output_dir='.')
import os
assert os.path.isfile('model_1_0.pth')
class DummyDataSetGAN(Dataset):
def __init__(self):
#!/usr/bin/env python
# encoding: utf-8
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")
import time
import os
#0. Add class balancing
#1. Logging to tensorboardX or a simpler logger
#2. Support for Validation set and validation loss
#3. Use to(device) instead of .cuda()?
#4. Functionality to select channels from the dataloader: may be move this to the datafolder class
#5. Moving more arguments to config?
def comp_bce_loss_weights(target):
Compute the balancing weights for the BCE loss function.
target : Tensor
Tensor containing class labels for each sample.
Tensor of the size: ``[num_patches]``
weights : Tensor
A tensor containing the weights for each sample.
weights = np.copy(target.cpu().numpy())
pos_weight = 1-np.sum(weights)/len(weights)
neg_weight = 1-pos_weight
# weights = weights/np.sum(weights)
weights = torch.Tensor(weights)
return weights
class MCCNNTrainer(object):
Class to train the MCCNN
network: :py:class:`torch.nn.Module`
The network to train
batch_size: int
The size of your minibatch
use_gpu: boolean
If you would like to use the gpu
verbosity_level: int
The level of verbosity output to stdout
def __init__(self, network, batch_size=64, use_gpu=False, adapted_layers='conv1-block1-group1',adapt_reference_channel=False, verbosity_level=2):
""" Init function
network: :py:class:`torch.nn.Module`
The network to train
batch_size: int
The size of your minibatch
use_gpu: boolean
If you would like to use the gpu
adapted_layers: str
The blocks in the CNN to adapt; only the ones listed are adapted in the training. The layers are separated by '-' in the
string, for example 'conv1-block1-group1'. The fully connected layer in the output part are adapted always.
adapt_reference_channel: bool
If this value is `True` then 'ch_0' (which is the reference channel- usually, grayscale image) is also adapted. Otherwise the reference channel
is not adapted, so that it can be used for Face recognition as well, default: `False`.
verbosity_level: int
The level of verbosity output to stdout
""" = network
self.batch_size = batch_size
self.use_gpu = use_gpu
self.criterion = nn.BCELoss()
# if selected_channel_indexes is not None and isinstance(selected_channel_indexes,list):
# assert(len(selected_channel_indexes)
# self.sel_ind=selected_channel_indexes
# else: # default is to select all channels
# self.sel_ind=[i for i in range(]
if self.use_gpu:
bob.core.log.set_verbosity_level(logger, verbosity_level)
layers_present =
# select the layers in the network to adapt
assert(set(adapted_layers_list)<=set(layers_present)) # to ensure layer names are valid
if adapt_reference_channel:# whether to adapt the color channel
start_index = 0
start_index = 1
layers_to_adapt = ['linear1fc','linear2fc'] # Final fully connected layers are added in all cases
#listing the layers which would be adapted
for i in range(start_index,
for layer in adapted_layers_list:
layers_to_adapt = list(np.unique(layers_to_adapt))"Listing the layers which would be adapted:")
for name, param in
param.requires_grad = False
for lta in layers_to_adapt:
if lta in name:
param.requires_grad = True
def load_model(self, model_filename):
"""Loads an existing model
model_file: str
The filename of the model to load
start_epoch: int
The epoch to start with
start_iteration: int
The iteration to start with
losses: list(float)
The list of losses from previous training
cp = torch.load(model_filename)['state_dict'])
start_epoch = cp['epoch']
start_iter = cp['iteration']
losses = cp['loss']
return start_epoch, start_iter, losses
def save_model(self, output_dir, epoch=0, iteration=0, losses=None):
"""Save the trained network
output_dir: str
The directory to write the models to
epoch: int
the current epoch
iteration: int
the current (last) iteration
losses: list(float)
The list of losses since the beginning of training
saved_filename = 'model_{}_{}.pth'.format(epoch, iteration)
saved_path = os.path.join(output_dir, saved_filename)'Saving model to {}'.format(saved_path))
cp = {'epoch': epoch,
'iteration': iteration,
'loss': losses,
}, saved_path)
# moved the model back to GPU if needed
if self.use_gpu :
def train(self, dataloader, n_epochs=25, learning_rate=1e-4, output_dir='out', model=None):
"""Performs the training.
dataloader: :py:class:``
The dataloader for your data
n_epochs: int
The number of epochs you would like to train for
learning_rate: float
The learning rate for Adam optimizer.
output_dir: str
The directory where you would like to save models
model: str
The path to a pretrained model file to start training from; this is the PAD model; not the LightCNN model
logger.debug("EPOCHS to train : {}".format(n_epochs))
logger.debug("Learning rate : {}".format(learning_rate))
# if model exists, load it
if model is not None:
start_epoch, start_iter, losses = self.load_model(model)'Starting training at epoch {}, iteration {} - last loss value is {}'.format(start_epoch, start_iter, losses[-1]))
start_epoch = 0
start_iter = 0
losses = []'Starting training from scratch')
# debug print the layers where grad is true ADD
for name, param in
if param.requires_grad == True:
logger.debug('Layer to be adapted from grad check : {}'.format(name))
# setup optimizer
optimizer = optim.Adam(filter(lambda p: p.requires_grad,,lr = learning_rate )
# let's go
for epoch in range(start_epoch, n_epochs):
for i, data in enumerate(dataloader, 0):
if i >= start_iter:
start = time.time()
img, labels = data
# print("img shape type", img.shape, type(img))
# img=imgt[:,self.sel_ind,:,:] # subselect channels
# print("img shape type", img.shape, type(img))
batch_size = len(img)
if self.use_gpu:
img = img.cuda()
labels = labels.cuda()
imagesv = Variable(img)
labelsv = Variable(labels)
loss = self.criterion(output, labelsv)
end = time.time()"[{}/{}][{}/{}] => Loss = {} (time spent: {})".format(epoch, n_epochs, i, len(dataloader), loss.item(), (end-start)))
# do stuff - like saving models"EPOCH {} DONE".format(epoch+1))
self.save_model(output_dir, epoch=(epoch+1), iteration=0, losses=losses)
from .CNNTrainer import CNNTrainer
from .MCCNNTrainer import MCCNNTrainer
from .DCGANTrainer import DCGANTrainer
from .ConditionalGANTrainer import ConditionalGANTrainer
......@@ -70,6 +70,7 @@ setup(
# scripts should be declared using this entry:
'console_scripts' : [
' = bob.learn.pytorch.scripts.train_cnn:main',
' = bob.learn.pytorch.scripts.train_mccnn:main',
' = bob.learn.pytorch.scripts.train_dcgan:main',
' = bob.learn.pytorch.scripts.train_conditionalgan:main',
' = bob.learn.pytorch.scripts.train_network:main',
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment