Commit 74c22c44 authored by Guillaume HEUSCH's avatar Guillaume HEUSCH
Browse files

[CondGAN] added architecture, trainer and script for Conditional GAN

parent b3319f64
#!/usr/bin/env python
# encoding: utf-8
import torch
import torch.nn as nn
def weights_init(m):
"""
Weights initialization
**Parameters**
m:
The model
"""
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
class ConditionalGAN_generator(nn.Module):
"""
Class defining the Conditional GAN generator.
**Parameters**
noise_dim: int
The dimension of the noise.
conditional_dim: int
The dimension of the conditioning variable.
channels: int
The number of channels in the input image (default: 3).
ngpu: int
The number of GPU (default: 1)
"""
def __init__(self, noise_dim, conditional_dim, channels=3, ngpu=1):
super(ConditionalGAN_generator, self).__init__()
self.ngpu = ngpu
self.conditional_dim = conditional_dim
# output dimension
ngf = 64
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d((noise_dim + conditional_dim), ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d(ngf, channels, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
def forward(self, z, y):
"""
Forward function for the generator.
**Parameters**
z: pyTorch Variable
The minibatch of noise.
y: pyTorch Variable
The conditional one hot encoded vector for the minibatch.
"""
generator_input = torch.cat((z, y), 1)
if isinstance(generator_input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
output = nn.parallel.data_parallel(self.main, generator_input, range(self.ngpu))
else:
output = self.main(generator_input)
return output
class ConditionalGAN_discriminator(nn.Module):
"""
Class defining the Conditional GAN discriminator.
**Parameters**
conditional_dim: int
The dimension of the conditioning variable.
channels: int
The number of channels in the input image (default: 3).
ngpu: int
The number of GPU (default: 1)
"""
def __init__(self, conditional_dim, channels=3, ngpu=1):
super(ConditionalGAN_discriminator, self).__init__()
self.conditional_dim = conditional_dim
self.ngpu = ngpu
# input dimension
ndf = 64
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d((channels + conditional_dim), ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, images, y):
"""
Forward function for the discriminator.
**Parameters**
images: pyTorch Variable
The minibatch of input images.
y: pyTorch Variable
The corresponding conditional feature maps.
"""
input_discriminator = torch.cat((images, y), 1)
if isinstance(input_discriminator.data, torch.cuda.FloatTensor) and self.ngpu > 1:
output = nn.parallel.data_parallel(self.main, input_discriminator, range(self.ngpu))
else:
output = self.main(input_discriminator)
return output.view(-1, 1).squeeze(1)
#!/usr/bin/env python
# encoding: utf-8
""" Train a Conditional GAN
Usage:
%(prog)s [--noise-dim=<int>] [--conditional-dim=<int>]
[--batch-size=<int>] [--epochs=<int>] [--sample=<int>]
[--output-dir=<path>] [--use-gpu] [--seed=<int>] [--verbose ...]
Options:
-h, --help Show this screen.
-V, --version Show version.
-n, --noise-dim=<int> The dimension of the noise [default: 100]
-c, --conditional-dim=<int> The dimension of the conditional variable [default: 13]
-b, --batch-size=<int> The size of your mini-batch [default: 64]
-e, --epochs=<int> The number of training epochs [default: 100]
-s, --sample=<int> Save generated images at every 'sample' batch iteration [default: 100000000000]
-o, --output-dir=<path> Dir to save the logs, models and images [default: ./cgan-multipie/]
-g, --use-gpu Use the GPU
-S, --seed=<int> The random seed [default: 3]
-v, --verbose Increase the verbosity (may appear multiple times).
Example:
To run the training process
$ %(prog)s --batch-size 64 --epochs 25 --output-dir drgan
See '%(prog)s --help' for more information.
"""
import os, sys
import pkg_resources
import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")
from docopt import docopt
version = pkg_resources.require('bob.learn.pytorch')[0].version
import numpy
import bob.io.base
# torch
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
# data and architecture from the package
from bob.learn.pytorch.datasets import MultiPIEDataset
from bob.learn.pytorch.datasets import RollChannels
from bob.learn.pytorch.datasets import ToTensor
from bob.learn.pytorch.datasets import Normalize
from bob.learn.pytorch.architectures import weights_init
from bob.learn.pytorch.architectures import ConditionalGAN_generator as cgenerator
from bob.learn.pytorch.architectures import ConditionalGAN_discriminator as cdiscriminator
from bob.learn.pytorch.trainers import ConditionalGANTrainer as ctrainer
def main(user_input=None):
# Parse the command-line arguments
if user_input is not None:
arguments = user_input
else:
arguments = sys.argv[1:]
prog = os.path.basename(sys.argv[0])
completions = dict(prog=prog, version=version,)
args = docopt(__doc__ % completions,argv=arguments,version='Train conditional GAN (%s)' % version,)
# verbosity
verbosity_level = args['--verbose']
bob.core.log.set_verbosity_level(logger, verbosity_level)
# get the arguments
noise_dim = int(args['--noise-dim'])
conditional_dim = int(args['--conditional-dim'])
batch_size = int(args['--batch-size'])
epochs = int(args['--epochs'])
sample = int(args['--sample'])
output_dir = str(args['--output-dir'])
seed = int(args['--seed'])
use_gpu = bool(args['--use-gpu'])
images_dir = os.path.join(output_dir, 'samples')
log_dir = os.path.join(output_dir, 'logs')
model_dir = os.path.join(output_dir, 'models')
# process on the arguments / options
torch.manual_seed(seed)
if use_gpu:
torch.cuda.manual_seed_all(seed)
if torch.cuda.is_available() and not use_gpu:
logger.warn("You have a CUDA device, so you should probably run with --use-gpu")
bob.io.base.create_directories_safe(images_dir)
bob.io.base.create_directories_safe(log_dir)
bob.io.base.create_directories_safe(images_dir)
# ============
# === DATA ===
# ============
# WARNING with the transforms ... act on labels too, at some point, I may have to write my own
# Also, in 'ToTensor', there is a reshape performed from: HxWxC to CxHxW
face_dataset = MultiPIEDataset(#root_dir='/Users/guillaumeheusch/work/idiap/data/multipie-cropped-64x64',
root_dir='/idiap/temp/heusch/data/multipie-cropped-64x64',
frontal_only=False,
transform=transforms.Compose([
RollChannels(), # bob to skimage:
ToTensor(),
Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
)
dataloader = torch.utils.data.DataLoader(face_dataset, batch_size=batch_size, shuffle=True)
logger.info("There are {} training images".format(len(face_dataset)))
# ===============
# === NETWORK ===
# ===============
ngpu = 1 # usually we don't have more than one GPU
generator = cgenerator(noise_dim, conditional_dim)
generator.apply(weights_init)
logger.info("Generator architecture: {}".format(generator))
discriminator = cdiscriminator(conditional_dim)
discriminator.apply(weights_init)
logger.info("Discriminator architecture: {}".format(discriminator))
# ===============
# === TRAINER ===
# ===============
trainer = ctrainer(generator, discriminator, [3, 64, 64], batch_size=batch_size, noise_dim=noise_dim, conditional_dim=conditional_dim, use_gpu=use_gpu, verbosity_level=verbosity_level)
trainer.train(dataloader, n_epochs=epochs, output_dir=output_dir)
#!/usr/bin/env python
# encoding: utf-8
import numpy
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision.utils as vutils
import bob.core
logger = bob.core.log.setup("bob.learn.pytorch")
class ConditionalGANTrainer(object):
"""
Class to train a Conditional GAN
**Parameters**
generator: pytorch nn.Module
The generator network
discriminator: pytorch nn.Module
The discriminator network
image_size: list
The size of the images in this format: [channels,height, width]
batch_size: int
The size of your minibatch
noise_dim: int
The dimension of the noise (input to the generator)
conditional_dim: int
The dimension of the conditioning variable
use_gpu: boolean
If you would like to use the gpu
verbosity_level: int
The level of verbosity output to stdout
"""
def __init__(self, netG, netD, image_size, batch_size=64, noise_dim=100, conditional_dim=13, use_gpu=False, verbosity_level=2):
bob.core.log.set_verbosity_level(logger, verbosity_level)
self.netG = netG
self.netD = netD
self.image_size = image_size
self.batch_size = batch_size
self.noise_dim = noise_dim
self.conditional_dim = conditional_dim
self.use_gpu = use_gpu
# fixed conditional noise - used to generate samples (one for each value of the conditional variable)
self.fixed_noise = torch.FloatTensor(self.conditional_dim, noise_dim, 1, 1).normal_(0, 1)
self.fixed_one_hot = torch.FloatTensor(self.conditional_dim, self.conditional_dim, 1, 1).zero_()
for k in range(self.conditional_dim):
self.fixed_one_hot[k, k] = 1
# TODO: figuring out the CPU/GPU thing - Guillaume HEUSCH, 17-11-2017
self.fixed_noise = Variable(self.fixed_noise)
self.fixed_one_hot = Variable(self.fixed_one_hot)
# binary cross-entropy loss
self.criterion = nn.BCELoss()
# move stuff to GPU if needed
if self.use_gpu:
self.netD.cuda()
self.netG.cuda()
self.criterion.cuda()
def train(self, dataloader, n_epochs=10, learning_rate=0.0002, beta1=0.5, output_dir='out'):
"""
Function that performs the training.
**Parameters**
dataloader: pytorch DataLoader
The dataloader for your data
n_epochs: int
The number of epochs you would like to train for
learning_rate: float
The learning rate for Adam optimizer
beta1: float
The beta1 for Adam optimizer
output_dir: path
The directory where you would like to output images and models
"""
real_label = 1
fake_label = 0
# setup optimizer
optimizerD = optim.Adam(self.netD.parameters(), lr=learning_rate, betas=(beta1, 0.999))
optimizerG = optim.Adam(self.netG.parameters(), lr=learning_rate, betas=(beta1, 0.999))
for epoch in range(n_epochs):
for i, data in enumerate(dataloader, 0):
start = time.time()
# get the data and pose labels
real_images = data['image']
poses = data['pose']
# WARNING: the last batch could be smaller than the provided size
batch_size = len(real_images)
# create the Tensors with the right batch size
noise = torch.FloatTensor(batch_size, self.noise_dim, 1, 1).normal_(0, 1)
label = torch.FloatTensor(batch_size)
# create the one hot conditional vector (generator) and feature maps (discriminator)
one_hot_feature_maps = torch.FloatTensor(batch_size, self.conditional_dim, self.image_size[1], self.image_size[2]).zero_()
one_hot_vector = torch.FloatTensor(batch_size, self.conditional_dim, 1, 1).zero_()
for k in range(batch_size):
one_hot_feature_maps[k, poses[k], :, :] = 1
one_hot_vector[k, poses[k]] = 1
# move stuff to GPU if needed
if self.use_gpu:
real_images = real_images.cuda()
label = label.cuda()
noise = noise.cuda()
one_hot_feature_maps = one_hot_feature_maps.cuda()
one_hot_vector = one_hot_vector.cuda()
# =============
# DISCRIMINATOR
# =============
self.netD.zero_grad()
# === REAL DATA ===
label.resize_(batch_size).fill_(real_label)
imagev = Variable(real_images)
one_hot_fmv = Variable(one_hot_feature_maps)
labelv = Variable(label)
output_real = self.netD(imagev, one_hot_fmv)
errD_real = self.criterion(output_real, labelv)
errD_real.backward()
# === FAKE DATA ===
noisev = Variable(noise)
one_hot_vv = Variable(one_hot_vector)
fake = self.netG(noisev, one_hot_vv)
labelv = Variable(label.fill_(fake_label))
output_fake = self.netD(fake, one_hot_fmv)
errD_fake = self.criterion(output_fake, labelv)
errD_fake.backward(retain_graph=True)
# perform optimization (i.e. update discriminator parameters)
errD = errD_real + errD_fake
optimizerD.step()
# =========
# GENERATOR
# =========
self.netG.zero_grad()
labelv = Variable(label.fill_(real_label)) # fake labels are real for generator cost
output_generated = self.netD(fake, one_hot_fmv)
errG = self.criterion(output_generated, labelv)
errG.backward()
optimizerG.step()
end = time.time()
logger.info("[{}/{}][{}/{}] => Loss D = {} -- Loss G = {} (time spent: {})".format(epoch, n_epochs, i, len(dataloader), errD.data[0], errG.data[0], (end-start)))
# save generated images at every epoch
# TODO: model moved to CPU and back and I don't really know why (expected CPU tensor error)
# To summarize:
# tried to move tensors, variables on the GPU -> does not work
# let the tensors on the CPU -> does not work
# => model has to be brought back to the CPU :/
self.netG = self.netG.cpu()
fake_examples = self.netG(self.fixed_noise, self.fixed_one_hot)
self.netG = self.netG.cuda()
vutils.save_image(fake_examples.data, '%s/fake_samples_epoch_%03d.png' % (output_dir, epoch), normalize=True)
# do checkpointing
torch.save(self.netG.state_dict(), '%s/netG_epoch_%d.pth' % (output_dir, epoch))
torch.save(self.netD.state_dict(), '%s/netD_epoch_%d.pth' % (output_dir, epoch))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment