notebook.community

Edit and run



In [ ]:

    
import os
os.environ['THEANO_FLAGS']='floatX=float32,device=cuda,optimizer=fast_run,dnn.library_path=/usr/lib'
channel_first = True
channel_axis=1



In [ ]:

    
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne.layers import DropoutLayer, ReshapeLayer, InputLayer 
floatX = theano.config.floatX
from lasagne.layers import Conv2DLayer, TransposedConv2DLayer, ConcatLayer, NonlinearityLayer
from lasagne.layers import batch_norm
from lasagne.nonlinearities import LeakyRectify, sigmoid, rectify, tanh



In [ ]:

    
conv_init = lasagne.init.Normal(0.02, 0)
gamma_init = lasagne.init.Normal(0.02, 1)



In [ ]:

    
def BASIC_D(nc_in, nc_out, ndf, max_layers=3, use_sigmoid=True):
    l = -1
    def conv2d(x, nf, stride=2, nonlinearity=LeakyRectify(0.2)):
        nonlocal l
        l+=1
        return Conv2DLayer(x, num_filters=nf, filter_size=4, stride=stride, 
                           pad=1, W=conv_init, flip_filters=False,
                            nonlinearity=nonlinearity,
                           name="conv2d_{}".format(l)
                          )
    input_a = InputLayer(shape=(None, nc_in, None, None), name="inputA")
    input_b = InputLayer(shape=(None, nc_out, None, None), name="inputB")
    _ = ConcatLayer([input_a, input_b], name='concat')
    _ = conv2d(_, ndf)
    for layer in range(1, max_layers):        
        out_feat = ndf * min(2**layer, 8)
        _ = conv2d(_, out_feat)
        _ = batch_norm(_, epsilon=1e-5, gamma=gamma_init)
    out_feat = ndf*min(2**max_layers, 8)
    _ = conv2d(_, out_feat, stride=1)
    _ = batch_norm(_, epsilon=1e-5, gamma=gamma_init)         
    _ = conv2d(_, 1, stride=1, nonlinearity=sigmoid if use_sigmoid else None)
    return _



In [ ]:

    
# from https://gist.github.com/ajbrock/a3858c26282d9731191901b397b3ce9f
def reflect_pad(x, width, batch_ndim=1):
    """
    Pad a tensor with a constant value.
    Parameters
    ----------
    x : tensor
    width : int, iterable of int, or iterable of tuple
        Padding width. If an int, pads each axis symmetrically with the same
        amount in the beginning and end. If an iterable of int, defines the
        symmetric padding width separately for each axis. If an iterable of
        tuples of two ints, defines a seperate padding width for each beginning
        and end of each axis.
    batch_ndim : integer
        Dimensions before the value will not be padded.
    """
    
    # Idea for how to make this happen: Flip the tensor horizontally to grab horizontal values, then vertically to grab vertical values
    # alternatively, just slice correctly
    input_shape = x.shape
    input_ndim = x.ndim

    output_shape = list(input_shape)
    indices = [slice(None) for _ in output_shape]

    if isinstance(width, int):
        widths = [width] * (input_ndim - batch_ndim)
    else:
        widths = width

    for k, w in enumerate(widths):
        try:
            l, r = w
        except TypeError:
            l = r = w
        output_shape[k + batch_ndim] += l + r
        indices[k + batch_ndim] = slice(l, l + input_shape[k + batch_ndim])

    # Create output array
    out = T.zeros(output_shape)
    
    # Vertical Reflections
    out=T.set_subtensor(out[:,:,:width,width:-width], x[:,:,width:0:-1,:])# out[:,:,:width,width:-width] = x[:,:,width:0:-1,:]
    out=T.set_subtensor(out[:,:,-width:,width:-width], x[:,:,-2:-(2+width):-1,:])#out[:,:,-width:,width:-width] = x[:,:,-2:-(2+width):-1,:]
    
    # Place X in out
    # out = T.set_subtensor(out[tuple(indices)], x) # or, alternative, out[width:-width,width:-width] = x
    out=T.set_subtensor(out[:,:,width:-width,width:-width],x)#out[:,:,width:-width,width:-width] = x
   
   #Horizontal reflections
    out=T.set_subtensor(out[:,:,:,:width],out[:,:,:,(2*width):width:-1])#out[:,:,:,:width] = out[:,:,:,(2*width):width:-1]
    out=T.set_subtensor(out[:,:,:,-width:],out[:,:,:,-(width+2):-(2*width+2):-1])#out[:,:,:,-width:] = out[:,:,:,-(width+2):-(2*width+2):-1]
    
    
    return out
    
class ReflectLayer(lasagne.layers.Layer):

    def __init__(self, incoming, width, batch_ndim=2, **kwargs):
        super(ReflectLayer, self).__init__(incoming, **kwargs)
        self.width = width
        self.batch_ndim = batch_ndim

    def get_output_shape_for(self, input_shape):
        output_shape = list(input_shape)

        if isinstance(self.width, int):
            widths = [self.width] * (len(input_shape) - self.batch_ndim)
        else:
            widths = self.width

        for k, w in enumerate(widths):
            if output_shape[k + self.batch_ndim] is None:
                continue
            else:
                try:
                    l, r = w
                except TypeError:
                    l = r = w
                output_shape[k + self.batch_ndim] += l + r
        return tuple(output_shape)

    def get_output_for(self, input, **kwargs):
        return reflect_pad(input, self.width, self.batch_ndim)



In [ ]:

    
def UNET_G(isize, nc_in=3, nc_out=3, ngf=64, fixed_input_size=True):    
    max_nf = 8*ngf    
    def block(x, s, nf_in, use_batchnorm=True, nf_out=None, nf_next=None):
        # print("block",x,s,nf_in, use_batchnorm, nf_out, nf_next)
        assert s>=2 and s%2==0
        if nf_next is None:
            nf_next = min(nf_in*2, max_nf)
        if nf_out is None:
            nf_out = nf_in
            
        x = Conv2DLayer(x, num_filters=nf_next, filter_size=4, stride=2, pad=1, W=conv_init, flip_filters=False,                
                nonlinearity=None, name='conv2d_{}'.format(s))
        if s>2:
            if use_batchnorm:
                x = batch_norm(x, epsilon=1e-5, gamma=gamma_init)
            x2 = NonlinearityLayer(x, nonlinearity=LeakyRectify(0.2), name="leakyRelu_{}".format(s))
            x2 = block(x2, s//2, nf_next)
            x = ConcatLayer([x, x2], name="concat_{}".format(s))            
        x = NonlinearityLayer(x, nonlinearity=rectify, name="Relu_{}".format(s))
        x = TransposedConv2DLayer(x, num_filters=nf_out, filter_size=4, stride=2, crop=1, W=conv_init, 
                                  flip_filters=True, nonlinearity=None, name="convt_{}".format(s))
        if use_batchnorm:
            x = batch_norm(x, epsilon=1e-5, gamma=gamma_init)
        if s <= 8:
            x = DropoutLayer(x, 0.5, name="dropout_{}".format(s))
        return x
    
    s = isize if fixed_input_size else None
    _ = InputLayer(shape=(None, nc_in, s, s), name='input')
    _ = block(_, isize, nc_in, False, nf_out=nc_out, nf_next=ngf)
    _ = NonlinearityLayer(_, nonlinearity=tanh, name='tanh')
    return _



In [ ]:

    
from lasagne.layers import ElemwiseSumLayer, SliceLayer
def reflect_padding_conv(_, num_filters, filter_size=3, stride=1, nonlinearity=rectify, use_batchnorm=True, **k):
    assert filter_size%2==1
    pad_size = filter_size>>1
    _ = ReflectLayer(_, width=pad_size)
    _ = Conv2DLayer(_, num_filters=num_filters, filter_size=filter_size, stride=stride, 
                           pad=0, W=conv_init, flip_filters=False, nonlinearity=nonlinearity, **k)
    if use_batchnorm:
        _ = batch_norm(_, epsilon=1e-5, gamma=gamma_init)
    return _
def res_block(_, num_filters, name):
    x = _
    _ = reflect_padding_conv(_, num_filters, name=name+"_conv1")
    _ = reflect_padding_conv(_, num_filters, nonlinearity=None, name=name+"_conv2")
    return ElemwiseSumLayer([x, _], name=name+"_add")

def RESNET_G(isize, nc_in=3, nc_out=3, ngf=64, fixed_input_size=True):
    s = isize if fixed_input_size else None    
    _ = InputLayer(shape=(None, nc_in, s, s), name='input')    
    _ = reflect_padding_conv(_, ngf, 7, name="first")    
    for m in [2,4]:
        _ = Conv2DLayer(_, num_filters=ngf*m, filter_size=4, stride=2, 
                           pad=1, W=conv_init, flip_filters=False, 
                           nonlinearity=rectify, name='conv_{}'.format(ngf*m))
        _ = batch_norm(_, epsilon=1e-5, gamma=gamma_init)
    for i in range(6):
        _ = res_block(_, ngf*4, "res_block{}".format(i))
    for m in [2,1]:
        _ = TransposedConv2DLayer(_, num_filters=ngf*m, filter_size=3, stride=2, 
                            crop=0, W=conv_init, flip_filters=True,
                            nonlinearity=rectify, name="convt_{}".format(ngf*m))
        _ = batch_norm(_, epsilon=1e-5, gamma=gamma_init)
        _ = SliceLayer(_, slice(0, -1),2)
        _ = SliceLayer(_, slice(0, -1),3)    
    _ = reflect_padding_conv(_, nc_out, 7, nonlinearity=tanh, use_batchnorm=False, name="output")
    return _



In [ ]:



In [ ]:

    
nc_in = 3
nc_out = 3
ngf = 64
ndf = 64
use_lsgan = True
λ = 10 if use_lsgan else 100

loadSize = 286
imageSize = 256
batchSize = 1
lrD = 2e-4
lrG = 2e-4



In [ ]:

    
netD = BASIC_D(nc_in, nc_out, ndf, use_sigmoid = not use_lsgan)
for l in lasagne.layers.get_all_layers(netD):
    print(l.name,  l.output_shape)



In [ ]:

    
#netG = UNET_G(imageSize, nc_in, nc_out, ngf)
netG = RESNET_G(imageSize, nc_in, nc_out, ngf)

for l in lasagne.layers.get_all_layers(netG):
    print(l.name,  l.output_shape)



In [ ]:

    
from lasagne.layers import get_output, get_all_layers,get_all_params
no_bn_avg = dict( batch_norm_update_averages=False,
                       batch_norm_use_averages=False)
real_A = get_all_layers(netG)[0].input_var
fake_B = get_output(netG, **no_bn_avg)
netG_generate =  theano.function([real_A], fake_B)

netD_l1, netD_l2 = get_all_layers(netD)[:2]
real_B = netD_l2.input_var

output_D_real = get_output(netD, inputs={netD_l1: real_A, netD_l2: real_B}, **no_bn_avg)
output_D_fake = get_output(netD, inputs={netD_l1: real_A, netD_l2: fake_B}, **no_bn_avg)
if use_lsgan:
    loss_fn = lambda output, target : T.mean( (output-target)**2 )
else:
    loss_fn = lambda output, target : -T.mean(T.log(output+1e-12)*target+T.log(1-output+1e-12)*(1-target))

loss_D_real = loss_fn(output_D_real, T.ones_like(output_D_real))
loss_D_fake = loss_fn(output_D_fake, T.zeros_like(output_D_fake))
loss_D = loss_D_real + loss_D_fake

loss_G_fake = loss_fn(output_D_fake, T.ones_like(output_D_fake))



In [ ]:

    
loss_L1 = T.mean(abs(fake_B-real_B))
loss_G =  loss_G_fake + λ * loss_L1

params_netD = get_all_params(netD, trainable=True) 
params_netG = get_all_params(netG, trainable=True)


optimize_G = lasagne.updates.adam(loss_G, params_netG, learning_rate=lrG, beta1=0.5)
optimize_D = lasagne.updates.adam(loss_D, params_netD, learning_rate=lrD, beta1=0.5)
netG_train =  theano.function([real_A, real_B], [loss_G_fake, loss_L1], updates=optimize_G)
netD_train = theano.function([real_A, real_B], [loss_D/2], updates=optimize_D)



In [ ]:

    
from PIL import Image
import numpy as np
import glob
from random import randint, shuffle

def load_data(file_pattern):
    return glob.glob(file_pattern)
def read_image(fn, direction=0):
    im = Image.open(fn)
    im = im.resize( (loadSize*2, loadSize), Image.BILINEAR )
    arr = np.array(im)/255*2-1
    w1,w2 = (loadSize-imageSize)//2,(loadSize+imageSize)//2
    h1,h2 = w1,w2
    imgA = arr[h1:h2, loadSize+w1:loadSize+w2, :]
    imgB = arr[h1:h2, w1:w2, :]
    if randint(0,1):
        imgA=imgA[:,::-1]
        imgB=imgB[:,::-1]
    if channel_first:
        imgA = np.moveaxis(imgA, 2, 0)
        imgB = np.moveaxis(imgB, 2, 0)
    if direction==0:
        return imgA, imgB
    else:
        return imgB,imgA

data = "edges2shoes"
#data = "facades"
direction = 1
trainAB = load_data('pix2pix/{}/train/*.jpg'.format(data))
valAB = load_data('pix2pix/{}/val/*.jpg'.format(data))
assert len(trainAB) and len(valAB)



In [ ]:

    
def minibatch(dataAB, batchsize, direction=0):
    length = len(dataAB)
    epoch = i = 0
    tmpsize = None    
    while True:
        size = tmpsize if tmpsize else batchsize
        if i+size > length:
            shuffle(dataAB)
            i = 0
            epoch+=1        
        dataA = []
        dataB = []
        for j in range(i,i+size):
            imgA,imgB = read_image(dataAB[j], direction)
            dataA.append(imgA)
            dataB.append(imgB)
        dataA = np.float32(dataA)
        dataB = np.float32(dataB)
        i+=size
        tmpsize = yield epoch, dataA, dataB



In [ ]:

    
from IPython.display import display
def showX(X, rows=1):
    assert X.shape[0]%rows == 0
    int_X = ( (X+1)/2*255).clip(0,255).astype('uint8')
    if channel_first:
        int_X = np.moveaxis(int_X.reshape(-1,3,imageSize,imageSize), 1, 3)
    else:
        int_X = int_X.reshape(-1,imageSize,imageSize, 3)
    int_X = int_X.reshape(rows, -1, imageSize, imageSize,3).swapaxes(1,2).reshape(rows*imageSize,-1, 3)
    display(Image.fromarray(int_X))



In [ ]:

    
train_batch = minibatch(trainAB, 12, direction=direction)
_, trainA, trainB = next(train_batch)
showX(trainA, 2)
showX(trainB, 2)
del train_batch, trainA, trainB



In [ ]:

    
def netG_gen(A):
    return np.concatenate([netG_generate(A[i:i+1]) for i in range(A.shape[0])], axis=0)



In [ ]:

    
import time
from IPython.display import clear_output
t0 = time.time()
niter = 150
gen_iterations = 0
errL1 = epoch = errG = 0
errL1_sum = errG_sum = errD_sum = 0

display_iters = 500
val_batch = minibatch(valAB, 6, direction)
train_batch = minibatch(trainAB, batchSize, direction)

while epoch < niter: 
    epoch, trainA, trainB = next(train_batch)        
    errD,  = netD_train(trainA, trainB)
    errD_sum +=errD

    # epoch, trainA, trainB = next(train_batch)
    errG, errL1 = netG_train(trainA, trainB)
    errG_sum += errG
    errL1_sum += errL1
    gen_iterations+=1
    if gen_iterations%display_iters==0:
        if gen_iterations%(5*display_iters)==0:
            clear_output()
        print('[%d/%d][%d] Loss_D: %f Loss_G: %f loss_L1: %f'
        % (epoch, niter, gen_iterations, errD_sum/display_iters, errG_sum/display_iters, errL1_sum/display_iters), time.time()-t0)
        _, valA, valB = train_batch.send(6) 
        fakeB = netG_gen(valA)
        showX(np.concatenate([valA, valB, fakeB], axis=0), 3)
        errL1_sum = errG_sum = errD_sum = 0
        _, valA, valB = next(val_batch)
        fakeB = netG_gen(valA)
        showX(np.concatenate([valA, valB, fakeB], axis=0), 3)



In [ ]: