In [1]:
%pylab inline
In [2]:
from io import BytesIO
from PIL import Image as PIL_Image
import numpy as np
from IPython.display import display, Image
def display_img_array(ima, **kwargs):
if ima.dtype == np.float32 or ima.dtype == np.float64:
ima = (np.clip(ima, 0., 1.)*255).astype(np.uint8)
im = PIL_Image.fromarray(ima)
bio = BytesIO()
im.save(bio, format='png')
display(Image(bio.getvalue(), format='png', **kwargs))
In [3]:
import os
import urllib
from urllib.request import urlretrieve
dataset = 'mnist.pkl.gz'
def reporthook(a,b,c):
print("\rdownloading: %5.1f%%"%(a*b*100.0/c), end="")
if not os.path.isfile(dataset):
origin = "https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz"
print('Downloading data from %s' % origin)
urlretrieve(origin, dataset, reporthook=reporthook)
In [4]:
import gzip
import pickle
with gzip.open(dataset, 'rb') as f:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
In [5]:
train_set = (np.concatenate([train_set[0], valid_set[0]]), np.concatenate([train_set[1], valid_set[1]]))
In [6]:
print("train_set", train_set[0].shape, train_set[1].shape)
print("test_set", test_set[0].shape, test_set[1].shape)
In [7]:
IMG_C, IMG_W, IMG_H=1,28,28
In [8]:
def show(x):
x = x.reshape(-1, IMG_C, IMG_H, IMG_W)
w = min(x.shape[0]*100, 800)
x = x.swapaxes(0, 1).swapaxes(1,2)
if IMG_C == 1:
img = x.reshape(IMG_H, -1)
else:
x = x.reshape(IMG_C, IMG_H, -1)
img = np.swapaxes(x, 0, 1)
img = np.swapaxes(img, 1, 2)
display_img_array(img, width=w)
for i in range(3):
show(train_set[0][i])
show(train_set[0][:10])
In [9]:
import sys
sys.setrecursionlimit(10000)
In [10]:
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne.layers import DenseLayer, DropoutLayer, ReshapeLayer, InputLayer, FlattenLayer, Upscale2DLayer, LocalResponseNormalization2DLayer
floatX = theano.config.floatX
In [11]:
from lasagne.layers.dnn import MaxPool2DDNNLayer, Conv2DDNNLayer
from lasagne.layers import TransposedConv2DLayer
In [12]:
from lasagne.layers import ElemwiseSumLayer, NonlinearityLayer, GlobalPoolLayer, ConcatLayer
from lasagne.nonlinearities import rectify
from lasagne.layers import get_output
def batch_norm(x):
return lasagne.layers.batch_norm(x, gamma=lasagne.init.Normal(1., 0.02))
In [13]:
from lasagne.objectives import categorical_crossentropy, binary_crossentropy, categorical_accuracy, binary_accuracy
In [14]:
from lasagne.nonlinearities import LeakyRectify, rectify, sigmoid, linear, softmax
lRELU = LeakyRectify(0.01)
RELU =rectify
In [15]:
def conv2d(*args, **kwargs):
return Conv2DDNNLayer(*args, W=lasagne.init.Normal(std=0.02), **kwargs)
def deconv2d(*args, **kwargs):
return TransposedConv2DLayer(*args, W=lasagne.init.Normal(std=0.02), **kwargs)
def dense(*args, **kwargs):
return DenseLayer(*args, W=lasagne.init.Normal(std=0.02), **kwargs)
In [16]:
input_Y = T.matrix() # discrete latent variable (1 10-category)
input_C = T.matrix() # continuous latent variable
input_Z = T.matrix() # noise (NU gaussian)
input_X = T.matrix() # image
target_var = T.matrix() # discriminator target
_ = InputLayer(shape=(None, IMG_C*IMG_H*IMG_W), input_var=input_X)
_ = ReshapeLayer(_, ([0], IMG_C, IMG_H, IMG_W))
_ = conv2d(_, 64, 4, stride=2, nonlinearity=lRELU, pad=1)
_ = batch_norm(conv2d(_, 128, 4, stride=2, nonlinearity=lRELU, pad=1))
_ = batch_norm(dense(_, num_units=1024, nonlinearity=lRELU))
l_discriminator = dense(_, num_units=1, nonlinearity=sigmoid)
_ = batch_norm(dense(_, num_units=128, nonlinearity=lRELU))
l_Q_Y = dense(_, num_units=10, nonlinearity=softmax)
l_Q_C_mean = dense(_, num_units=2, nonlinearity=linear)
l_Q_C_logstddev = dense(_, num_units=2, nonlinearity=lambda x:T.maximum(x, -16))
NU = 64 # dim of noise
CU = 2 # dim of continuous latent variable
input_var2 = T.matrix()
_Y = InputLayer(shape=(None, 10), input_var=input_Y)
_C = InputLayer(shape=(None, CU), input_var=input_C)
_Z = InputLayer(shape=(None, NU), input_var=input_Z)
_ = ConcatLayer((_Y, _C, _Z))
_ = batch_norm(dense(_, num_units=1024, nonlinearity=RELU))
_ = batch_norm(dense(_, num_units=128*(IMG_H//4)*(IMG_W//4), nonlinearity=RELU))
_ = ReshapeLayer(_, ([0], 128, IMG_H//4, IMG_W//4))
_ = batch_norm(deconv2d(_, 64, 4, stride=2, nonlinearity=RELU, crop=1))
_ = batch_norm(deconv2d(_, 1, 4, stride=2, nonlinearity=RELU, crop=1))
l_generator = FlattenLayer(_)
def clip(x):
return T.clip(x, 1e-8, 1-1e-8)
output_generator_deterministic = get_output(l_generator, deterministic=True)
output_discriminator = get_output(l_discriminator) #, batch_norm_use_averages=True)
output_Q_Y = get_output(l_Q_Y)
output_generator = get_output(l_generator)
output_generator_discriminator = get_output(l_discriminator, inputs=output_generator,
deterministic=False)
output_generator_Q_Y, output_generator_Q_C_mean, output_generator_Q_C_logstddev =\
get_output((l_Q_Y, l_Q_C_mean, l_Q_C_logstddev),
inputs=output_generator, deterministic=False)
TINY = 1e-8
loss_discriminator0 = -T.log(output_discriminator + TINY).mean() - T.log(1. - output_generator_discriminator + TINY).mean()
loss_generator0 = -T.log(output_generator_discriminator + TINY).mean()
loss_Q_Y = categorical_crossentropy(clip(output_generator_Q_Y), input_Y).mean()
epsilon = (input_C - output_generator_Q_C_mean) /(T.exp(output_generator_Q_C_logstddev) + TINY)
loss_Q_C = (output_generator_Q_C_logstddev + 0.5 * T.square(epsilon)).mean()
loss_discriminator = loss_discriminator0 + loss_Q_Y + loss_Q_C
loss_generator = loss_generator0 + loss_Q_Y + loss_Q_C
accuracy_generator = binary_accuracy(output_generator_discriminator,
T.zeros_like(output_generator_discriminator)).mean()
accuracy_discriminator = binary_accuracy(output_discriminator, T.ones_like(output_generator_discriminator)).mean()
accuracy_discriminator = (accuracy_discriminator + accuracy_generator)/2
accuracy_Q_Y = categorical_accuracy(output_generator_Q_Y, input_Y).mean()
params_discriminator = lasagne.layers.get_all_params([l_discriminator, l_Q_Y, l_Q_C_mean, l_Q_C_logstddev],
trainable=True)
params_generator = lasagne.layers.get_all_params(l_generator, trainable=True)
updates_generator = lasagne.updates.adam(loss_generator,
params_generator,
learning_rate=1e-3, beta1=0.5)
updates_discriminator = lasagne.updates.adam(loss_discriminator,
params_discriminator,
learning_rate=2e-4, beta1=0.5)
train_generator_fn = theano.function([input_Y, input_C, input_Z],
(loss_generator0, loss_Q_Y, loss_Q_C,
accuracy_generator, accuracy_Q_Y),
updates=updates_generator)
train_discriminator_fn = theano.function([input_X, input_Y, input_C, input_Z],
(loss_discriminator0, loss_Q_Y, loss_Q_C,
accuracy_discriminator, accuracy_Q_Y),
updates=updates_discriminator)
generator_fn = theano.function([input_Y, input_C, input_Z], output_generator_deterministic)
In [17]:
logf = open('mnist-infogan-v3.log', 'w')
import sys
def printx(*args, **kwargs):
print(*args, **kwargs)
print(*args, file=logf, **kwargs)
logf.flush()
sys.stdout.flush()
In [18]:
import sys
from random import randint
X=train_set[0] #[train_set[1]==5]
Y=train_set[1].astype('int32')
X_test = test_set[0]
Y_test = test_set[1].astype('int32')
last_imgs = None
j = 0
batch_size=128
In [19]:
def sample_X(batch_size=batch_size):
idx = np.random.randint(0, train_set[0].shape[0], size=batch_size)
return train_set[0][idx]
def sample_Y(batch_size=batch_size):
input_Y = np.zeros((batch_size, 10), dtype='float32')
random_y=np.random.randint(0, 10, size=batch_size)
input_Y[np.arange(batch_size), random_y] = 1
return input_Y
def sample_Z(batch_size=batch_size):
return (np.random.uniform(low=-1, high=1, size=(batch_size, NU))).astype('float32')
def sample_C(batch_size=batch_size):
return (np.random.uniform(low=-1, high=1, size=(batch_size, CU))).astype('float32')
In [20]:
import scipy.stats
ppf = scipy.stats.norm.ppf
def show_result(N=16):
results=[]
for j in range(10):
pic=None
for x in range(N):
row = None
C = np.array([ [ppf(0.05*(x+1)), ppf(0.05*(y+1))] for y in range(N)], dtype='float32')
Y=np.zeros((N,10),dtype='float32')
Y[:,j]=1
row = generator_fn(Y, C, sample_Z(N))
row = row.reshape(-1, IMG_H, IMG_W)
row = row.swapaxes(0,1).reshape(IMG_H, -1)
pic = row if pic is None else np.concatenate((pic,row), axis=0)
results.append(pic)
display_img_array(np.concatenate(results[:5], axis=1))
display_img_array(np.concatenate(results[5:], axis=1))
In [21]:
for epoch in range(50):
for j in range(100):
d_err, qy_err, qc_err, d_acc, qy_acc = train_discriminator_fn(sample_X(), sample_Y(), sample_C(), sample_Z())
g_err, qy_err2, qc_err2, g_acc, qy_acc2 = train_generator_fn(sample_Y(), sample_C(), sample_Z())
printx("epoch=", epoch)
show_result(10)
printx("d_err", d_err, d_acc)
printx("qy_err", qy_err, qy_acc)
printx("g_err", g_err, g_acc)
printx("qy_err", qy_err2, qy_acc2)
printx("qc_err", qc_err, qc_err2)
In [22]:
Y=np.zeros((100,10),dtype='float32')
Y[arange(100), arange(100)//10]=1
C=np.array([ [0.1+0.08*(i%10)]*2 for i in range(100)], dtype='float32')
imgs = generator_fn(Y, C, sample_Z(100))
for i in range(0, 100, 10):
show(imgs[i:i+10])
In [23]:
#np.savez('cifar10_gan_classifier_generator.npz', lasagne.layers.get_all_param_values(l_generator))
#np.savez('cifar10_gan_classifier_discriminator.npz', lasagne.layers.get_all_param_values(l_discriminator))
#np.savez('cifar10_gan_classifier_classifier.npz', lasagne.layers.get_all_param_values(l_classifier))
In [24]:
show_result(10)