In [ ]:
import os
os.environ['THEANO_FLAGS']='floatX=float32,device=cuda,optimizer=fast_run,dnn.library_path=/usr/lib'
channel_first = True
channel_axis=1
In [ ]:
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne.layers import DropoutLayer, ReshapeLayer, InputLayer
floatX = theano.config.floatX
from lasagne.layers import Conv2DLayer, TransposedConv2DLayer, ConcatLayer, NonlinearityLayer
from lasagne.layers import batch_norm
from lasagne.nonlinearities import LeakyRectify, sigmoid, rectify, tanh
In [ ]:
conv_init = lasagne.init.Normal(0.02, 0)
gamma_init = lasagne.init.Normal(0.02, 1)
In [ ]:
def BASIC_D(nc_in, nc_out, ndf, max_layers=3, use_sigmoid=True):
l = -1
def conv2d(x, nf, stride=2, nonlinearity=LeakyRectify(0.2)):
nonlocal l
l+=1
return Conv2DLayer(x, num_filters=nf, filter_size=4, stride=stride,
pad=1, W=conv_init, flip_filters=False,
nonlinearity=nonlinearity,
name="conv2d_{}".format(l)
)
input_a = InputLayer(shape=(None, nc_in, None, None), name="inputA")
input_b = InputLayer(shape=(None, nc_out, None, None), name="inputB")
_ = ConcatLayer([input_a, input_b], name='concat')
_ = conv2d(_, ndf)
for layer in range(1, max_layers):
out_feat = ndf * min(2**layer, 8)
_ = conv2d(_, out_feat)
_ = batch_norm(_, epsilon=1e-5, gamma=gamma_init)
out_feat = ndf*min(2**max_layers, 8)
_ = conv2d(_, out_feat, stride=1)
_ = batch_norm(_, epsilon=1e-5, gamma=gamma_init)
_ = conv2d(_, 1, stride=1, nonlinearity=sigmoid if use_sigmoid else None)
return _
In [ ]:
# from https://gist.github.com/ajbrock/a3858c26282d9731191901b397b3ce9f
def reflect_pad(x, width, batch_ndim=1):
"""
Pad a tensor with a constant value.
Parameters
----------
x : tensor
width : int, iterable of int, or iterable of tuple
Padding width. If an int, pads each axis symmetrically with the same
amount in the beginning and end. If an iterable of int, defines the
symmetric padding width separately for each axis. If an iterable of
tuples of two ints, defines a seperate padding width for each beginning
and end of each axis.
batch_ndim : integer
Dimensions before the value will not be padded.
"""
# Idea for how to make this happen: Flip the tensor horizontally to grab horizontal values, then vertically to grab vertical values
# alternatively, just slice correctly
input_shape = x.shape
input_ndim = x.ndim
output_shape = list(input_shape)
indices = [slice(None) for _ in output_shape]
if isinstance(width, int):
widths = [width] * (input_ndim - batch_ndim)
else:
widths = width
for k, w in enumerate(widths):
try:
l, r = w
except TypeError:
l = r = w
output_shape[k + batch_ndim] += l + r
indices[k + batch_ndim] = slice(l, l + input_shape[k + batch_ndim])
# Create output array
out = T.zeros(output_shape)
# Vertical Reflections
out=T.set_subtensor(out[:,:,:width,width:-width], x[:,:,width:0:-1,:])# out[:,:,:width,width:-width] = x[:,:,width:0:-1,:]
out=T.set_subtensor(out[:,:,-width:,width:-width], x[:,:,-2:-(2+width):-1,:])#out[:,:,-width:,width:-width] = x[:,:,-2:-(2+width):-1,:]
# Place X in out
# out = T.set_subtensor(out[tuple(indices)], x) # or, alternative, out[width:-width,width:-width] = x
out=T.set_subtensor(out[:,:,width:-width,width:-width],x)#out[:,:,width:-width,width:-width] = x
#Horizontal reflections
out=T.set_subtensor(out[:,:,:,:width],out[:,:,:,(2*width):width:-1])#out[:,:,:,:width] = out[:,:,:,(2*width):width:-1]
out=T.set_subtensor(out[:,:,:,-width:],out[:,:,:,-(width+2):-(2*width+2):-1])#out[:,:,:,-width:] = out[:,:,:,-(width+2):-(2*width+2):-1]
return out
class ReflectLayer(lasagne.layers.Layer):
def __init__(self, incoming, width, batch_ndim=2, **kwargs):
super(ReflectLayer, self).__init__(incoming, **kwargs)
self.width = width
self.batch_ndim = batch_ndim
def get_output_shape_for(self, input_shape):
output_shape = list(input_shape)
if isinstance(self.width, int):
widths = [self.width] * (len(input_shape) - self.batch_ndim)
else:
widths = self.width
for k, w in enumerate(widths):
if output_shape[k + self.batch_ndim] is None:
continue
else:
try:
l, r = w
except TypeError:
l = r = w
output_shape[k + self.batch_ndim] += l + r
return tuple(output_shape)
def get_output_for(self, input, **kwargs):
return reflect_pad(input, self.width, self.batch_ndim)
In [ ]:
def UNET_G(isize, nc_in=3, nc_out=3, ngf=64, fixed_input_size=True):
max_nf = 8*ngf
def block(x, s, nf_in, use_batchnorm=True, nf_out=None, nf_next=None):
# print("block",x,s,nf_in, use_batchnorm, nf_out, nf_next)
assert s>=2 and s%2==0
if nf_next is None:
nf_next = min(nf_in*2, max_nf)
if nf_out is None:
nf_out = nf_in
x = Conv2DLayer(x, num_filters=nf_next, filter_size=4, stride=2, pad=1, W=conv_init, flip_filters=False,
nonlinearity=None, name='conv2d_{}'.format(s))
if s>2:
if use_batchnorm:
x = batch_norm(x, epsilon=1e-5, gamma=gamma_init)
x2 = NonlinearityLayer(x, nonlinearity=LeakyRectify(0.2), name="leakyRelu_{}".format(s))
x2 = block(x2, s//2, nf_next)
x = ConcatLayer([x, x2], name="concat_{}".format(s))
x = NonlinearityLayer(x, nonlinearity=rectify, name="Relu_{}".format(s))
x = TransposedConv2DLayer(x, num_filters=nf_out, filter_size=4, stride=2, crop=1, W=conv_init,
flip_filters=True, nonlinearity=None, name="convt_{}".format(s))
if use_batchnorm:
x = batch_norm(x, epsilon=1e-5, gamma=gamma_init)
if s <= 8:
x = DropoutLayer(x, 0.5, name="dropout_{}".format(s))
return x
s = isize if fixed_input_size else None
_ = InputLayer(shape=(None, nc_in, s, s), name='input')
_ = block(_, isize, nc_in, False, nf_out=nc_out, nf_next=ngf)
_ = NonlinearityLayer(_, nonlinearity=tanh, name='tanh')
return _
In [ ]:
from lasagne.layers import ElemwiseSumLayer, SliceLayer
def reflect_padding_conv(_, num_filters, filter_size=3, stride=1, nonlinearity=rectify, use_batchnorm=True, **k):
assert filter_size%2==1
pad_size = filter_size>>1
_ = ReflectLayer(_, width=pad_size)
_ = Conv2DLayer(_, num_filters=num_filters, filter_size=filter_size, stride=stride,
pad=0, W=conv_init, flip_filters=False, nonlinearity=nonlinearity, **k)
if use_batchnorm:
_ = batch_norm(_, epsilon=1e-5, gamma=gamma_init)
return _
def res_block(_, num_filters, name):
x = _
_ = reflect_padding_conv(_, num_filters, name=name+"_conv1")
_ = reflect_padding_conv(_, num_filters, nonlinearity=None, name=name+"_conv2")
return ElemwiseSumLayer([x, _], name=name+"_add")
def RESNET_G(isize, nc_in=3, nc_out=3, ngf=64, fixed_input_size=True):
s = isize if fixed_input_size else None
_ = InputLayer(shape=(None, nc_in, s, s), name='input')
_ = reflect_padding_conv(_, ngf, 7, name="first")
for m in [2,4]:
_ = Conv2DLayer(_, num_filters=ngf*m, filter_size=4, stride=2,
pad=1, W=conv_init, flip_filters=False,
nonlinearity=rectify, name='conv_{}'.format(ngf*m))
_ = batch_norm(_, epsilon=1e-5, gamma=gamma_init)
for i in range(6):
_ = res_block(_, ngf*4, "res_block{}".format(i))
for m in [2,1]:
_ = TransposedConv2DLayer(_, num_filters=ngf*m, filter_size=3, stride=2,
crop=0, W=conv_init, flip_filters=True,
nonlinearity=rectify, name="convt_{}".format(ngf*m))
_ = batch_norm(_, epsilon=1e-5, gamma=gamma_init)
_ = SliceLayer(_, slice(0, -1),2)
_ = SliceLayer(_, slice(0, -1),3)
_ = reflect_padding_conv(_, nc_out, 7, nonlinearity=tanh, use_batchnorm=False, name="output")
return _
In [ ]:
In [ ]:
nc_in = 3
nc_out = 3
ngf = 64
ndf = 64
use_lsgan = True
λ = 10 if use_lsgan else 100
loadSize = 286
imageSize = 256
batchSize = 1
lrD = 2e-4
lrG = 2e-4
In [ ]:
netD = BASIC_D(nc_in, nc_out, ndf, use_sigmoid = not use_lsgan)
for l in lasagne.layers.get_all_layers(netD):
print(l.name, l.output_shape)
In [ ]:
#netG = UNET_G(imageSize, nc_in, nc_out, ngf)
netG = RESNET_G(imageSize, nc_in, nc_out, ngf)
for l in lasagne.layers.get_all_layers(netG):
print(l.name, l.output_shape)
In [ ]:
from lasagne.layers import get_output, get_all_layers,get_all_params
no_bn_avg = dict( batch_norm_update_averages=False,
batch_norm_use_averages=False)
real_A = get_all_layers(netG)[0].input_var
fake_B = get_output(netG, **no_bn_avg)
netG_generate = theano.function([real_A], fake_B)
netD_l1, netD_l2 = get_all_layers(netD)[:2]
real_B = netD_l2.input_var
output_D_real = get_output(netD, inputs={netD_l1: real_A, netD_l2: real_B}, **no_bn_avg)
output_D_fake = get_output(netD, inputs={netD_l1: real_A, netD_l2: fake_B}, **no_bn_avg)
if use_lsgan:
loss_fn = lambda output, target : T.mean( (output-target)**2 )
else:
loss_fn = lambda output, target : -T.mean(T.log(output+1e-12)*target+T.log(1-output+1e-12)*(1-target))
loss_D_real = loss_fn(output_D_real, T.ones_like(output_D_real))
loss_D_fake = loss_fn(output_D_fake, T.zeros_like(output_D_fake))
loss_D = loss_D_real + loss_D_fake
loss_G_fake = loss_fn(output_D_fake, T.ones_like(output_D_fake))
In [ ]:
loss_L1 = T.mean(abs(fake_B-real_B))
loss_G = loss_G_fake + λ * loss_L1
params_netD = get_all_params(netD, trainable=True)
params_netG = get_all_params(netG, trainable=True)
optimize_G = lasagne.updates.adam(loss_G, params_netG, learning_rate=lrG, beta1=0.5)
optimize_D = lasagne.updates.adam(loss_D, params_netD, learning_rate=lrD, beta1=0.5)
netG_train = theano.function([real_A, real_B], [loss_G_fake, loss_L1], updates=optimize_G)
netD_train = theano.function([real_A, real_B], [loss_D/2], updates=optimize_D)
In [ ]:
from PIL import Image
import numpy as np
import glob
from random import randint, shuffle
def load_data(file_pattern):
return glob.glob(file_pattern)
def read_image(fn, direction=0):
im = Image.open(fn)
im = im.resize( (loadSize*2, loadSize), Image.BILINEAR )
arr = np.array(im)/255*2-1
w1,w2 = (loadSize-imageSize)//2,(loadSize+imageSize)//2
h1,h2 = w1,w2
imgA = arr[h1:h2, loadSize+w1:loadSize+w2, :]
imgB = arr[h1:h2, w1:w2, :]
if randint(0,1):
imgA=imgA[:,::-1]
imgB=imgB[:,::-1]
if channel_first:
imgA = np.moveaxis(imgA, 2, 0)
imgB = np.moveaxis(imgB, 2, 0)
if direction==0:
return imgA, imgB
else:
return imgB,imgA
data = "edges2shoes"
#data = "facades"
direction = 1
trainAB = load_data('pix2pix/{}/train/*.jpg'.format(data))
valAB = load_data('pix2pix/{}/val/*.jpg'.format(data))
assert len(trainAB) and len(valAB)
In [ ]:
def minibatch(dataAB, batchsize, direction=0):
length = len(dataAB)
epoch = i = 0
tmpsize = None
while True:
size = tmpsize if tmpsize else batchsize
if i+size > length:
shuffle(dataAB)
i = 0
epoch+=1
dataA = []
dataB = []
for j in range(i,i+size):
imgA,imgB = read_image(dataAB[j], direction)
dataA.append(imgA)
dataB.append(imgB)
dataA = np.float32(dataA)
dataB = np.float32(dataB)
i+=size
tmpsize = yield epoch, dataA, dataB
In [ ]:
from IPython.display import display
def showX(X, rows=1):
assert X.shape[0]%rows == 0
int_X = ( (X+1)/2*255).clip(0,255).astype('uint8')
if channel_first:
int_X = np.moveaxis(int_X.reshape(-1,3,imageSize,imageSize), 1, 3)
else:
int_X = int_X.reshape(-1,imageSize,imageSize, 3)
int_X = int_X.reshape(rows, -1, imageSize, imageSize,3).swapaxes(1,2).reshape(rows*imageSize,-1, 3)
display(Image.fromarray(int_X))
In [ ]:
train_batch = minibatch(trainAB, 12, direction=direction)
_, trainA, trainB = next(train_batch)
showX(trainA, 2)
showX(trainB, 2)
del train_batch, trainA, trainB
In [ ]:
def netG_gen(A):
return np.concatenate([netG_generate(A[i:i+1]) for i in range(A.shape[0])], axis=0)
In [ ]:
import time
from IPython.display import clear_output
t0 = time.time()
niter = 150
gen_iterations = 0
errL1 = epoch = errG = 0
errL1_sum = errG_sum = errD_sum = 0
display_iters = 500
val_batch = minibatch(valAB, 6, direction)
train_batch = minibatch(trainAB, batchSize, direction)
while epoch < niter:
epoch, trainA, trainB = next(train_batch)
errD, = netD_train(trainA, trainB)
errD_sum +=errD
# epoch, trainA, trainB = next(train_batch)
errG, errL1 = netG_train(trainA, trainB)
errG_sum += errG
errL1_sum += errL1
gen_iterations+=1
if gen_iterations%display_iters==0:
if gen_iterations%(5*display_iters)==0:
clear_output()
print('[%d/%d][%d] Loss_D: %f Loss_G: %f loss_L1: %f'
% (epoch, niter, gen_iterations, errD_sum/display_iters, errG_sum/display_iters, errL1_sum/display_iters), time.time()-t0)
_, valA, valB = train_batch.send(6)
fakeB = netG_gen(valA)
showX(np.concatenate([valA, valB, fakeB], axis=0), 3)
errL1_sum = errG_sum = errD_sum = 0
_, valA, valB = next(val_batch)
fakeB = netG_gen(valA)
showX(np.concatenate([valA, valB, fakeB], axis=0), 3)
In [ ]: