Commit 38a5586f authored by Anjith GEORGE's avatar Anjith GEORGE

Added trainer and trainer script

parent c2a2f4b8
from torchvision import transforms
from bob.learn.pytorch.architectures import MCCNN
from bob.learn.pytorch.datasets import DataFolder
from bob.pad.face.database import BatlPadDatabase
#==============================================================================
# Load the dataset
""" The steps in initializing the dataset is as follows
1. Initialize a databae instance, with the protocol, groups and number of frames
(currently for the ones in 'bob.pad.face', and point 'data_folder_train' to the preprocessed directory )
Note: Here we assume that we have already preprocessed the with `spoof.py` script and dumped it to location
pointed to by 'data_folder_train'.
2. Specify the transform to be used on the images. It can be instances of `torchvision.transforms.Compose` or custom functions.
3. Initialize the `data_folder` class with the database instance and all other parameters. This dataset instance is used in
the trainer class
4. Initialize the network architecture with required arguments.
"""
data_folder_train='/idiap/temp/ageorge/WMCA/preprocessed/'
frames=50
extension='.h5'
train_groups=['train'] # only 'train' group is used for training the network
protocols="grandtest-color*depth*infrared*thermal-{}".format(frames) # makeup is excluded anyway here
exlude_attacks_list=["makeup"]
img_transform_train = transforms.Compose([transforms.ToPILImage(),transforms.RandomHorizontalFlip(),transforms.ToTensor()])# Add p=0.5 later
bob_hldi_instance_train = BatlPadDatabase(
protocol=protocols,
original_directory=data_folder_train,
original_extension=extension,
landmark_detect_method="mtcnn", # detect annotations using mtcnn
exclude_attacks_list=exlude_attacks_list,
exclude_pai_all_sets=True, # exclude makeup from all the sets, which is the default behavior for grandtest protocol
append_color_face_roi_annot=False)
dataset = DataFolder(data_folder=data_folder_train,
transform=img_transform_train,
extension='.hdf5',
bob_hldi_instance=bob_hldi_instance_train,
groups=train_groups,
protocol=protocols,
purposes=['real', 'attack'],
allow_missing_files=True)
#==============================================================================
# Load the architecture
network=MCCNN(num_channels=4)
#==============================================================================
# Specify other training parameters
batch_size = 64
epochs=25
learning_rate=0.0001
seed = 3
output_dir = 'training_mccn'
use_gpu = False
verbose = 2
#==============================================================================
"""
Note: Running in GPU
jman submit --queue gpu \
--name mccnn \
--log-dir /idiap/user/ageorge/WORK/COMMON_ENV_PAD_BATL_DB/training_mccn/logs/ \
--environment="PYTHONUNBUFFERED=1" -- \
./bin/train_mccnn.py \
/idiap/user/ageorge/WORK/COMMON_ENV_PAD_BATL_DB/src/bob.learn.pytorch/bob/learn/pytorch/config/mccnn/wmca_mccnn.py --use-gpu -vvv
Note: Running in cpu
./bin/train_mccnn.py \
/idiap/user/ageorge/WORK/COMMON_ENV_PAD_BATL_DB/src/bob.learn.pytorch/bob/learn/pytorch/config/mccnn/wmca_mccnn.py -vvv
"""
#!/usr/bin/env python
# encoding: utf-8
""" Train a MCCNN for muti-channel face PAD
Usage:
%(prog)s <configuration>
[--model=<string>] [--batch-size=<int>] [--epochs=<int>]
[--learning-rate=<float>] [--seed=<int>]
[--output-dir=<path>] [--use-gpu] [--verbose ...]
Arguments:
<configuration> A configuration file, defining the dataset and the network
Options:
-h, --help Shows this help message and exits
--model=<string> Filename of the model to load (if any).
--batch-size=<int> Batch size [default: 64]
--epochs=<int> Number of training epochs [default: 20]
--learning-rate=<float> Learning rate [default: 0.01]
-S, --seed=<int> The random seed [default: 3]
-o, --output-dir=<path> Dir to save stuff [default: training]
-g, --use-gpu Use the GPU
-v, --verbose Increase the verbosity (may appear multiple times).
Note that arguments provided directly by command-line will override the ones in the configuration file.
Example:
To run the training process
$ %(prog)s config.py
See '%(prog)s --help' for more information.
"""
import os, sys
import pkg_resources
import torch
import numpy
from docopt import docopt
import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")
from bob.extension.config import load
from bob.learn.pytorch.trainers import MCCNNTrainer
from bob.learn.pytorch.utils import get_parameter
version = pkg_resources.require('bob.learn.pytorch')[0].version
def main(user_input=None):
# Parse the command-line arguments
if user_input is not None:
arguments = user_input
else:
arguments = sys.argv[1:]
prog = os.path.basename(sys.argv[0])
completions = dict(prog=prog, version=version,)
args = docopt(__doc__ % completions,argv=arguments,version='Train a MCCNN (%s)' % version,)
# load configuration file
configuration = load([os.path.join(args['<configuration>'])])
# get the pre-trained model file, if any
model = args['--model']
if hasattr(configuration, 'model'):
model = configuration.model
# get various parameters, either from config file or command-line
batch_size = get_parameter(args, configuration, 'batch_size', 64)
epochs = get_parameter(args, configuration, 'epochs', 20)
learning_rate = get_parameter(args, configuration, 'learning_rate', 0.01)
seed = get_parameter(args, configuration, 'seed', 3)
output_dir = get_parameter(args, configuration, 'output_dir', 'training')
use_gpu = get_parameter(args, configuration, 'use_gpu', False)
verbosity_level = get_parameter(args, configuration, 'verbose', 0)
bob.core.log.set_verbosity_level(logger, verbosity_level)
bob.io.base.create_directories_safe(output_dir)
# print parameters
logger.debug("Model file = {}".format(model))
logger.debug("Batch size = {}".format(batch_size))
logger.debug("Epochs = {}".format(epochs))
logger.debug("Learning rate = {}".format(learning_rate))
logger.debug("Seed = {}".format(seed))
logger.debug("Output directory = {}".format(output_dir))
logger.debug("Use GPU = {}".format(use_gpu))
# process on the arguments / options
torch.manual_seed(seed)
if use_gpu:
torch.cuda.manual_seed_all(seed)
if torch.cuda.is_available() and not use_gpu:
logger.warn("You have a CUDA device, so you should probably run with --use-gpu")
# get data
if hasattr(configuration, 'dataset'):
dataloader = torch.utils.data.DataLoader(configuration.dataset, batch_size=batch_size, shuffle=True)
logger.info("There are {} training samples".format(len(configuration.dataset)))
else:
logger.error("Please provide a dataset in your configuration file !")
sys.exit()
# train the network
if hasattr(configuration, 'network'):
trainer = MCCNNTrainer(configuration.network, batch_size=batch_size, use_gpu=use_gpu, adapted_layers='conv1-block1-group1', adapt_reference_channel=False, verbosity_level=verbosity_level)
trainer.train(dataloader, n_epochs=epochs, learning_rate=learning_rate, output_dir=output_dir, model=model)
else:
logger.error("Please provide a network in your configuration file !")
sys.exit()
...@@ -183,7 +183,32 @@ def test_CNNtrainer(): ...@@ -183,7 +183,32 @@ def test_CNNtrainer():
os.remove('model_1_0.pth') os.remove('model_1_0.pth')
class DummyDataSetMCCNN(Dataset):
def __init__(self):
pass
def __len__(self):
return 100
def __getitem__(self, idx):
data = numpy.random.rand(4, 128, 128).astype("float32")
label = numpy.random.randint(2)
sample = data, label
return sample
def test_MCCNNtrainer():
from ..architectures import MCCNN
net = MCCNN(num_channels=4)
dataloader = torch.utils.data.DataLoader(DummyDataSetMCCNN(), batch_size=32, shuffle=True)
from ..trainers import MCCNNTrainer
trainer = MCCNNTrainer(net, verbosity_level=3)
trainer.train(dataloader, n_epochs=1, output_dir='.')
import os
assert os.path.isfile('model_1_0.pth')
os.remove('model_1_0.pth')
class DummyDataSetGAN(Dataset): class DummyDataSetGAN(Dataset):
def __init__(self): def __init__(self):
pass pass
......
#!/usr/bin/env python
# encoding: utf-8
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")
import time
import os
#TODO:
#0. Add class balancing
#1. Logging to tensorboardX or a simpler logger
#2. Support for Validation set and validation loss
#3. Use to(device) instead of .cuda()?
#4. Functionality to select channels from the dataloader: may be move this to the datafolder class
#5. Moving more arguments to config?
def comp_bce_loss_weights(target):
"""
Compute the balancing weights for the BCE loss function.
Arguments
---------
target : Tensor
Tensor containing class labels for each sample.
Tensor of the size: ``[num_patches]``
Returns
-------
weights : Tensor
A tensor containing the weights for each sample.
"""
weights = np.copy(target.cpu().numpy())
pos_weight = 1-np.sum(weights)/len(weights)
neg_weight = 1-pos_weight
weights[weights==1]=pos_weight
weights[weights==0]=neg_weight
# weights = weights/np.sum(weights)
weights = torch.Tensor(weights)
return weights
class MCCNNTrainer(object):
"""
Class to train the MCCNN
Attributes
----------
network: :py:class:`torch.nn.Module`
The network to train
batch_size: int
The size of your minibatch
use_gpu: boolean
If you would like to use the gpu
verbosity_level: int
The level of verbosity output to stdout
"""
def __init__(self, network, batch_size=64, use_gpu=False, adapted_layers='conv1-block1-group1',adapt_reference_channel=False, verbosity_level=2):
""" Init function
Parameters
----------
network: :py:class:`torch.nn.Module`
The network to train
batch_size: int
The size of your minibatch
use_gpu: boolean
If you would like to use the gpu
adapted_layers: str
The blocks in the CNN to adapt; only the ones listed are adapted in the training. The layers are separated by '-' in the
string, for example 'conv1-block1-group1'. The fully connected layer in the output part are adapted always.
adapt_reference_channel: bool
If this value is `True` then 'ch_0' (which is the reference channel- usually, grayscale image) is also adapted. Otherwise the reference channel
is not adapted, so that it can be used for Face recognition as well, default: `False`.
verbosity_level: int
The level of verbosity output to stdout
"""
self.network = network
self.batch_size = batch_size
self.use_gpu = use_gpu
self.criterion = nn.BCELoss()
# if selected_channel_indexes is not None and isinstance(selected_channel_indexes,list):
# assert(len(selected_channel_indexes)==self.network.num_channels)
# self.sel_ind=selected_channel_indexes
# else: # default is to select all channels
# self.sel_ind=[i for i in range(self.network.num_channels)]
if self.use_gpu:
self.network.cuda()
bob.core.log.set_verbosity_level(logger, verbosity_level)
layers_present = self.network.lcnn_layers
# select the layers in the network to adapt
adapted_layers_list=adapted_layers.split('-')
assert(set(adapted_layers_list)<=set(layers_present)) # to ensure layer names are valid
if adapt_reference_channel:# whether to adapt the color channel
start_index = 0
else:
start_index = 1
layers_to_adapt = ['linear1fc','linear2fc'] # Final fully connected layers are added in all cases
#listing the layers which would be adapted
for i in range(start_index,self.network.num_channels):
for layer in adapted_layers_list:
layers_to_adapt.append("layer_dict.ch_{}_".format(i)+layer)
layers_to_adapt = list(np.unique(layers_to_adapt))
#logger.info("Listing the layers which would be adapted:")
for name, param in self.network.named_parameters():
param.requires_grad = False
for lta in layers_to_adapt:
if lta in name:
#logger.info(name)
param.requires_grad = True
def load_model(self, model_filename):
"""Loads an existing model
Parameters
----------
model_file: str
The filename of the model to load
Returns
-------
start_epoch: int
The epoch to start with
start_iteration: int
The iteration to start with
losses: list(float)
The list of losses from previous training
"""
cp = torch.load(model_filename)
self.network.load_state_dict(cp['state_dict'])
start_epoch = cp['epoch']
start_iter = cp['iteration']
losses = cp['loss']
return start_epoch, start_iter, losses
def save_model(self, output_dir, epoch=0, iteration=0, losses=None):
"""Save the trained network
Parameters
----------
output_dir: str
The directory to write the models to
epoch: int
the current epoch
iteration: int
the current (last) iteration
losses: list(float)
The list of losses since the beginning of training
"""
saved_filename = 'model_{}_{}.pth'.format(epoch, iteration)
saved_path = os.path.join(output_dir, saved_filename)
logger.info('Saving model to {}'.format(saved_path))
cp = {'epoch': epoch,
'iteration': iteration,
'loss': losses,
'state_dict': self.network.cpu().state_dict()
}
torch.save(cp, saved_path)
# moved the model back to GPU if needed
if self.use_gpu :
self.network.cuda()
def train(self, dataloader, n_epochs=25, learning_rate=1e-4, output_dir='out', model=None):
"""Performs the training.
Parameters
----------
dataloader: :py:class:`torch.utils.data.DataLoader`
The dataloader for your data
n_epochs: int
The number of epochs you would like to train for
learning_rate: float
The learning rate for Adam optimizer.
output_dir: str
The directory where you would like to save models
model: str
The path to a pretrained model file to start training from; this is the PAD model; not the LightCNN model
"""
logger.debug("EPOCHS to train : {}".format(n_epochs))
logger.debug("Learning rate : {}".format(learning_rate))
# if model exists, load it
if model is not None:
start_epoch, start_iter, losses = self.load_model(model)
logger.info('Starting training at epoch {}, iteration {} - last loss value is {}'.format(start_epoch, start_iter, losses[-1]))
else:
start_epoch = 0
start_iter = 0
losses = []
logger.info('Starting training from scratch')
# debug print the layers where grad is true ADD
for name, param in self.network.named_parameters():
if param.requires_grad == True:
logger.debug('Layer to be adapted from grad check : {}'.format(name))
# setup optimizer
optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.network.parameters()),lr = learning_rate )
self.network.train(True)
# let's go
for epoch in range(start_epoch, n_epochs):
for i, data in enumerate(dataloader, 0):
if i >= start_iter:
start = time.time()
img, labels = data
# print("img shape type", img.shape, type(img))
# img=imgt[:,self.sel_ind,:,:] # subselect channels
# print("img shape type", img.shape, type(img))
#sel_ind
labels=labels.float().unsqueeze(1)
weights=comp_bce_loss_weights(labels)
batch_size = len(img)
if self.use_gpu:
img = img.cuda()
labels = labels.cuda()
weights=weights.cuda()
imagesv = Variable(img)
labelsv = Variable(labels)
self.criterion.weight=weights
output= self.network(imagesv)
loss = self.criterion(output, labelsv)
optimizer.zero_grad()
loss.backward()
optimizer.step()
end = time.time()
logger.info("[{}/{}][{}/{}] => Loss = {} (time spent: {})".format(epoch, n_epochs, i, len(dataloader), loss.item(), (end-start)))
losses.append(loss.item())
# do stuff - like saving models
logger.info("EPOCH {} DONE".format(epoch+1))
self.save_model(output_dir, epoch=(epoch+1), iteration=0, losses=losses)
from .CNNTrainer import CNNTrainer from .CNNTrainer import CNNTrainer
from .MCCNNTrainer import MCCNNTrainer
from .DCGANTrainer import DCGANTrainer from .DCGANTrainer import DCGANTrainer
from .ConditionalGANTrainer import ConditionalGANTrainer from .ConditionalGANTrainer import ConditionalGANTrainer
......
...@@ -70,6 +70,7 @@ setup( ...@@ -70,6 +70,7 @@ setup(
# scripts should be declared using this entry: # scripts should be declared using this entry:
'console_scripts' : [ 'console_scripts' : [
'train_cnn.py = bob.learn.pytorch.scripts.train_cnn:main', 'train_cnn.py = bob.learn.pytorch.scripts.train_cnn:main',
'train_mccnn.py = bob.learn.pytorch.scripts.train_mccnn:main',
'train_dcgan.py = bob.learn.pytorch.scripts.train_dcgan:main', 'train_dcgan.py = bob.learn.pytorch.scripts.train_dcgan:main',
'train_conditionalgan.py = bob.learn.pytorch.scripts.train_conditionalgan:main', 'train_conditionalgan.py = bob.learn.pytorch.scripts.train_conditionalgan:main',
'train_network.py = bob.learn.pytorch.scripts.train_network:main', 'train_network.py = bob.learn.pytorch.scripts.train_network:main',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment