Commit dd580d02 authored by Anjith GEORGE's avatar Anjith GEORGE
Browse files

cleanup

parent e60850f3
Pipeline #29857 passed with stage
in 65 minutes and 43 seconds
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import h5py
from torchvision import transforms
import numpy as np
"""
@author: Olegs Nikisins
"""
#==============================================================================
# ==============================================================================
# Import what is needed here:
import torch.utils.data as data
......@@ -13,17 +16,11 @@ import os
import random
random.seed( a = 7 )
random.seed(a=7)
import numpy as np
from torchvision import transforms
import h5py
#==============================================================================
def get_file_names_and_labels(files, data_folder, extension = ".hdf5", hldi_type = "pad"):
# ==============================================================================
def get_file_names_and_labels(files, data_folder, extension=".hdf5", hldi_type="pad"):
"""
Get absolute names of the corresponding file objects and their class labels,
as well as keys defining name of the frame to load the data from.
......@@ -70,14 +67,14 @@ def get_file_names_and_labels(files, data_folder, extension = ".hdf5", hldi_type
file_name = os.path.join(data_folder, f.path + extension)
if os.path.isfile(file_name): # if file is available:
if os.path.isfile(file_name): # if file is available:
with h5py.File(file_name, "r") as f_h5py:
file_keys = list(f_h5py.keys())
#removes the 'FrameIndexes' key
file_keys=[f for f in file_keys if f!='FrameIndexes' ]
# removes the 'FrameIndexes' key
file_keys = [f for f in file_keys if f != 'FrameIndexes']
# elements of tuples in the below list are as follows:
# a filename a key is extracted from,
......@@ -89,7 +86,7 @@ def get_file_names_and_labels(files, data_folder, extension = ".hdf5", hldi_type
return file_names_labels_keys
#==============================================================================
# ==============================================================================
class DataFolderGeneric(data.Dataset):
"""
A generic data loader compatible with Bob High Level Database Interfaces
......@@ -155,14 +152,14 @@ class DataFolderGeneric(data.Dataset):
"""
def __init__(self, data_folder,
transform = None,
extension = '.hdf5',
bob_hldi_instance = None,
hldi_type = "pad",
groups = ['train', 'dev', 'eval'],
protocol = 'grandtest',
transform=None,
extension='.hdf5',
bob_hldi_instance=None,
hldi_type="pad",
groups=['train', 'dev', 'eval'],
protocol='grandtest',
purposes=['real', 'attack'],
allow_missing_files = True,custom_func=None,
allow_missing_files=True, custom_func=None,
**kwargs):
"""
Attributes
......@@ -220,19 +217,20 @@ class DataFolderGeneric(data.Dataset):
if bob_hldi_instance is not None:
files = bob_hldi_instance.objects(groups = self.groups,
protocol = self.protocol,
purposes = self.purposes,
files = bob_hldi_instance.objects(groups=self.groups,
protocol=self.protocol,
purposes=self.purposes,
**kwargs)
file_names_labels_keys = get_file_names_and_labels(files = files,
data_folder = self.data_folder,
extension = self.extension,
hldi_type = self.hldi_type)
file_names_labels_keys = get_file_names_and_labels(files=files,
data_folder=self.data_folder,
extension=self.extension,
hldi_type=self.hldi_type)
if self.allow_missing_files: # return only existing files
if self.allow_missing_files: # return only existing files
file_names_labels_keys = [f for f in file_names_labels_keys if os.path.isfile(f[0])]
file_names_labels_keys = [
f for f in file_names_labels_keys if os.path.isfile(f[0])]
else:
......@@ -241,8 +239,8 @@ class DataFolderGeneric(data.Dataset):
self.file_names_labels_keys = file_names_labels_keys
# ==========================================================================
#==========================================================================
def __getitem__(self, index):
"""
Returns a **transformed** sample/image and a target class, given index.
......@@ -269,41 +267,44 @@ class DataFolderGeneric(data.Dataset):
with h5py.File(path, "r") as f_h5py:
img_array = np.array(f_h5py.get(key+'/array')) # The size now is (3 x W x H)
# The size now is (3 x W x H)
img_array = np.array(f_h5py.get(key+'/array'))
if isinstance(self.transform, transforms.Compose): # if an instance of torchvision composed transformation
# if an instance of torchvision composed transformation
if isinstance(self.transform, transforms.Compose):
if len(img_array.shape) == 3: # for color or multi-channel images
if len(img_array.shape) == 3: # for color or multi-channel images
img_array_tr = np.swapaxes(img_array, 1, 2)
img_array_tr = np.swapaxes(img_array_tr, 0, 2)
np_img =img_array_tr.copy() # np_img is numpy.ndarray of shape HxWxC
np_img = img_array_tr.copy() # np_img is numpy.ndarray of shape HxWxC
else: # for gray-scale images
np_img=np.expand_dims(img_array_tr,2) # np_img is numpy.ndarray of size HxWx1
else: # for gray-scale images
# np_img is numpy.ndarray of size HxWx1
np_img = np.expand_dims(img_array_tr, 2)
if self.transform is not None:
np_img = self.transform(np_img) # after this transformation np_img should be a tensor
# after this transformation np_img should be a tensor
np_img = self.transform(np_img)
else: # if custom transformation function is given
else: # if custom transformation function is given
img_array_transformed = self.transform(img_array)
return img_array_transformed, target
# NOTE: make sure ``img_array_transformed`` converted to Tensor in your custom ``transform`` function.
if self.custom_func is not None: # custom function to change the return to something else
if self.custom_func is not None: # custom function to change the return to something else
return self.custom_func(np_img,target)
return self.custom_func(np_img, target)
return np_img, target
# ==========================================================================
#==========================================================================
def __len__(self):
"""
Returns
......@@ -313,4 +314,3 @@ class DataFolderGeneric(data.Dataset):
The length of the file list.
"""
return len(self.file_names_labels_keys)
This diff is collapsed.
#!/usr/bin/env python
# encoding: utf-8
import copy
import os
import time
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from .tflog import Logger
from .tflog import Logger
import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")
import time
import os
import copy
class GenericTrainer(object):
"""
Class to train a generic NN; all the parameters are provided in configs
Attributes
----------
network: :py:class:`torch.nn.Module`
The network to train
optimizer: :py:class:`torch.optim.Optimizer`
Optimizer object to be used. Initialized in the config file.
device: str
Device which will be used for training the model
verbosity_level: int
The level of verbosity output to stdout
"""
def __init__(self, network, optimizer, compute_loss, learning_rate=0.0001, device='cpu', verbosity_level=2, tf_logdir='tf_logs',do_crossvalidation=False, save_interval=5):
""" Init function . The layers to be adapted in the network is selected and the gradients are set to `True`
for the layers which needs to be adapted.
Parameters
----------
network: :py:class:`torch.nn.Module`
The network to train
device: str
Device which will be used for training the model
verbosity_level: int
The level of verbosity output to stdout
do_crossvalidation: bool
If set to `True`, performs validation in each epoch and stores the best model based on validation loss.
"""
self.network = network
self.optimizer=optimizer
self.compute_loss=compute_loss
self.device = device
self.learning_rate=learning_rate
self.save_interval=save_interval
self.do_crossvalidation=do_crossvalidation
if self.do_crossvalidation:
phases=['train','val']
else:
phases=['train']
self.phases=phases
# Move the network to device
self.network.to(self.device)
bob.core.log.set_verbosity_level(logger, verbosity_level)
self.tf_logger = Logger(tf_logdir)
# Setting the gradients to true for the layers which needs to be adapted
def load_model(self, model_filename):
"""Loads an existing model
Parameters
----------
model_file: str
The filename of the model to load
Returns
-------
start_epoch: int
The epoch to start with
start_iteration: int
The iteration to start with
losses: list(float)
The list of losses from previous training
"""
cp = torch.load(model_filename)
self.network.load_state_dict(cp['state_dict'])
start_epoch = cp['epoch']
start_iter = cp['iteration']
losses = cp['loss']
return start_epoch, start_iter, losses
def save_model(self, output_dir, epoch=0, iteration=0, losses=None):
"""Save the trained network
Parameters
----------
output_dir: str
The directory to write the models to
epoch: int
the current epoch
iteration: int
the current (last) iteration
losses: list(float)
The list of losses since the beginning of training
"""
saved_filename = 'model_{}_{}.pth'.format(epoch, iteration)
saved_path = os.path.join(output_dir, saved_filename)
logger.info('Saving model to {}'.format(saved_path))
cp = {'epoch': epoch,
'iteration': iteration,
'loss': losses,
'state_dict': self.network.cpu().state_dict()
}
torch.save(cp, saved_path)
self.network.to(self.device)
"""
Class to train a generic NN; all the parameters are provided in configs
Attributes
----------
network: :py:class:`torch.nn.Module`
The network to train
optimizer: :py:class:`torch.optim.Optimizer`
Optimizer object to be used. Initialized in the config file.
device: str
Device which will be used for training the model
verbosity_level: int
The level of verbosity output to stdout
"""
def __init__(self, network, optimizer, compute_loss, learning_rate=0.0001, device='cpu', verbosity_level=2, tf_logdir='tf_logs', do_crossvalidation=False, save_interval=5):
""" Init function . The layers to be adapted in the network is selected and the gradients are set to `True`
for the layers which needs to be adapted.
Parameters
----------
network: :py:class:`torch.nn.Module`
The network to train
device: str
Device which will be used for training the model
verbosity_level: int
The level of verbosity output to stdout
do_crossvalidation: bool
If set to `True`, performs validation in each epoch and stores the best model based on validation loss.
"""
self.network = network
self.optimizer = optimizer
self.compute_loss = compute_loss
self.device = device
self.learning_rate = learning_rate
self.save_interval = save_interval
self.do_crossvalidation = do_crossvalidation
if self.do_crossvalidation:
phases = ['train', 'val']
else:
phases = ['train']
self.phases = phases
# Move the network to device
self.network.to(self.device)
bob.core.log.set_verbosity_level(logger, verbosity_level)
self.tf_logger = Logger(tf_logdir)
# Setting the gradients to true for the layers which needs to be adapted
def load_model(self, model_filename):
"""Loads an existing model
Parameters
----------
model_file: str
The filename of the model to load
Returns
-------
start_epoch: int
The epoch to start with
start_iteration: int
The iteration to start with
losses: list(float)
The list of losses from previous training
"""
cp = torch.load(model_filename)
self.network.load_state_dict(cp['state_dict'])
start_epoch = cp['epoch']
start_iter = cp['iteration']
losses = cp['loss']
return start_epoch, start_iter, losses
def save_model(self, output_dir, epoch=0, iteration=0, losses=None):
"""Save the trained network
Parameters
----------
output_dir: str
The directory to write the models to
epoch: int
the current epoch
iteration: int
the current (last) iteration
losses: list(float)
The list of losses since the beginning of training
"""
saved_filename = 'model_{}_{}.pth'.format(epoch, iteration)
saved_path = os.path.join(output_dir, saved_filename)
logger.info('Saving model to {}'.format(saved_path))
cp = {'epoch': epoch,
'iteration': iteration,
'loss': losses,
'state_dict': self.network.cpu().state_dict()
}
torch.save(cp, saved_path)
self.network.to(self.device)
def train(self, dataloader, n_epochs=25, output_dir='out', model=None):
"""Performs the training.
Parameters
----------
dataloader: :py:class:`torch.utils.data.DataLoader`
The dataloader for your data
n_epochs: int
The number of epochs you would like to train for
learning_rate: float
The learning rate for Adam optimizer.
output_dir: str
The directory where you would like to save models
model: str
The path to a pretrained model file to start training from; this is the PAD model; not the LightCNN model
"""
# if model exists, load it
if model is not None:
start_epoch, start_iter, losses = self.load_model(model)
logger.info('Starting training at epoch {}, iteration {} - last loss value is {}'.format(
start_epoch, start_iter, losses[-1]))
else:
start_epoch = 0
start_iter = 0
losses = []
logger.info('Starting training from scratch')
for name, param in self.network.named_parameters():
if param.requires_grad == True:
logger.info(
'Layer to be adapted from grad check : {}'.format(name))
def train(self, dataloader, n_epochs=25, output_dir='out', model=None):
"""Performs the training.
# setup optimizer
Parameters
----------
dataloader: :py:class:`torch.utils.data.DataLoader`
The dataloader for your data
n_epochs: int
The number of epochs you would like to train for
learning_rate: float
The learning rate for Adam optimizer.
output_dir: str
The directory where you would like to save models
model: str
The path to a pretrained model file to start training from; this is the PAD model; not the LightCNN model
self.network.train(True)
"""
best_model_wts = copy.deepcopy(self.network.state_dict())
# if model exists, load it
if model is not None:
start_epoch, start_iter, losses = self.load_model(model)
logger.info('Starting training at epoch {}, iteration {} - last loss value is {}'.format(start_epoch, start_iter, losses[-1]))
else:
start_epoch = 0
start_iter = 0
losses = []
logger.info('Starting training from scratch')
best_loss = float("inf")
# let's go
for epoch in range(start_epoch, n_epochs):
for name, param in self.network.named_parameters():
# in the epoch
if param.requires_grad == True:
logger.info('Layer to be adapted from grad check : {}'.format(name))
train_loss_history = []
# setup optimizer
val_loss_history = []
for phase in self.phases:
self.network.train(True)
if phase == 'train':
self.network.train() # Set model to training mode
else:
self.network.eval() # Set model to evaluate mode
best_model_wts = copy.deepcopy(self.network.state_dict())
best_loss = float("inf")
for i, data in enumerate(dataloader[phase], 0):
# let's go
for epoch in range(start_epoch, n_epochs):
if i >= start_iter:
# in the epoch
start = time.time()
train_loss_history=[]
# get data from dataset
val_loss_history = []
img, labels = data
for phase in self.phases:
self.optimizer.zero_grad()
if phase == 'train':
self.network.train() # Set model to training mode
else:
self.network.eval() # Set model to evaluate mode
with torch.set_grad_enabled(phase == 'train'):
loss = self.compute_loss(
self.network, img, labels, self.device)
for i, data in enumerate(dataloader[phase], 0):
if phase == 'train':
if i >= start_iter:
start = time.time()
loss.backward()
# get data from dataset
self.optimizer.step()
img, labels = data
self.optimizer.zero_grad()
train_loss_history.append(loss.item())
else:
with torch.set_grad_enabled(phase == 'train'):
loss = self.compute_loss(self.network, img, labels, self.device)
val_loss_history.append(loss.item())
if phase == 'train':
end = time.time()
loss.backward()
logger.info("[{}/{}][{}/{}] => Loss = {} (time spent: {}), Phase {}".format(
epoch, n_epochs, i, len(dataloader[phase]), loss.item(), (end-start), phase))
self.optimizer.step()
losses.append(loss.item())
train_loss_history.append(loss.item())
else:
epoch_train_loss = np.mean(train_loss_history)
val_loss_history.append(loss.item())
logger.info("Train Loss : {} epoch : {}".format(
epoch_train_loss, epoch))
if self.do_crossvalidation:
end = time.time()
epoch_val_loss = np.mean(val_loss_history)
logger.info("[{}/{}][{}/{}] => Loss = {} (time spent: {}), Phase {}".format(epoch, n_epochs, i, len(dataloader[phase]), loss.item(), (end-start),phase))
logger.info("Val Loss : {} epoch : {}".format(
epoch_val_loss, epoch))
losses.append(loss.item())
if phase == 'val' and epoch_val_loss < best_loss