In [1]:
%pylab inline
In [2]:
from io import BytesIO
from PIL import Image as PIL_Image
import numpy as np
from IPython.display import display, Image
def display_img_array(ima, **kwargs):
if ima.dtype == np.float32 or ima.dtype == np.float64:
ima = (np.clip(ima, 0., 1.)*255).astype(np.uint8)
im = PIL_Image.fromarray(ima)
bio = BytesIO()
im.save(bio, format='png')
display(Image(bio.getvalue(), format='png', **kwargs))
In [3]:
import os
import urllib
from urllib.request import urlretrieve
dataset = 'mnist.pkl.gz'
def reporthook(a,b,c):
print("\rdownloading: %5.1f%%"%(a*b*100.0/c), end="")
if not os.path.isfile(dataset):
origin = "https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz"
print('Downloading data from %s' % origin)
urlretrieve(origin, dataset, reporthook=reporthook)
In [4]:
import gzip
import pickle
with gzip.open(dataset, 'rb') as f:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
In [5]:
print("train_set", train_set[0].shape, train_set[1].shape)
print("test_set", test_set[0].shape, test_set[1].shape)
In [6]:
IMG_C, IMG_W, IMG_H=1,28,28
In [7]:
def show(x):
x = x.reshape(-1, IMG_C, IMG_H, IMG_W)
w = min(x.shape[0]*100, 800)
x = x.swapaxes(0, 1).swapaxes(1,2)
if IMG_C == 1:
img = x.reshape(IMG_H, -1)
else:
x = x.reshape(IMG_C, IMG_H, -1)
img = np.swapaxes(x, 0, 1)
img = np.swapaxes(img, 1, 2)
display_img_array(img, width=w)
for i in range(3):
show(train_set[0][i])
show(train_set[0][:10])
In [8]:
import sys
sys.setrecursionlimit(10000)
In [9]:
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne.layers import DenseLayer, DropoutLayer, ReshapeLayer, InputLayer, FlattenLayer, Upscale2DLayer, LocalResponseNormalization2DLayer
floatX = theano.config.floatX
In [10]:
from lasagne.layers.dnn import MaxPool3DDNNLayer, Conv3DDNNLayer, MaxPool2DDNNLayer, Conv2DDNNLayer
In [11]:
from lasagne.layers import batch_norm, ElemwiseSumLayer, NonlinearityLayer, GlobalPoolLayer, ConcatLayer
from lasagne.nonlinearities import rectify
from lasagne.layers import get_output
In [12]:
from lasagne.objectives import categorical_crossentropy, binary_crossentropy, categorical_accuracy, binary_accuracy
In [13]:
input_Y = T.matrix() # discrete latent variable (1 10-category)
input_C = T.matrix() # continuous latent variable (3 gaussian)
input_Z = T.matrix() # noise (NU gaussian)
input_X = T.matrix() # image
target_var = T.matrix() # discriminator target
def conv(*args, **kargs):
return batch_norm(Conv2DDNNLayer(*args, pad='same', nonlinearity=rectify, W=lasagne.init.HeNormal(), **kargs))
_ = InputLayer(shape=(None, IMG_C*IMG_H*IMG_W), input_var=input_X)
_ = ReshapeLayer(_, ([0], IMG_C, IMG_H, IMG_W))
_ = batch_norm(Conv2DDNNLayer(_, 64, 3, pad='same'))
_ = batch_norm(Conv2DDNNLayer(_, 64, 3, pad='same'))
_ = MaxPool2DDNNLayer(_, 2)
_ = batch_norm(Conv2DDNNLayer(_, 128, 3, pad='same'))
_ = MaxPool2DDNNLayer(_, 2)
_ = batch_norm(Conv2DDNNLayer(_, 256, 3, pad='same'))
_ = batch_norm(Conv2DDNNLayer(_, 64, 3, pad='same'))
_ = FlattenLayer(_)
_ = DenseLayer(_, num_units=1000, nonlinearity=lasagne.nonlinearities.rectify)
l_discriminator = DenseLayer(_, num_units=1, nonlinearity=lasagne.nonlinearities.sigmoid)
l_Q_Y = DenseLayer(_, num_units=10, nonlinearity=lasagne.nonlinearities.softmax)
l_Q_C = DenseLayer(_, num_units=4, nonlinearity=lasagne.nonlinearities.linear)
NU = 32 # dim of noise
input_var2 = T.matrix()
_Y = InputLayer(shape=(None, 10), input_var=input_Y)
#_C = InputLayer(shape=(None, 3), input_var=input_C)
_Z = InputLayer(shape=(None, NU), input_var=input_Z)
_ = ConcatLayer((_Y, _Z))
_ = batch_norm(DenseLayer(_, num_units=1000, nonlinearity=lasagne.nonlinearities.rectify))
_ = batch_norm(DenseLayer(_, num_units=64*(IMG_H//4)*(IMG_W//4), nonlinearity=lasagne.nonlinearities.rectify))
_ = ReshapeLayer(_, ([0], 64, IMG_H//4, IMG_W//4))
_ = batch_norm(Conv2DDNNLayer(_, 128, 3, pad='same'))
_ = Upscale2DLayer(_, 2)
_ = batch_norm(Conv2DDNNLayer(_, 128, 3, pad='same'))
_ = Upscale2DLayer(_, 2)
_ = batch_norm(Conv2DDNNLayer(_, 128, 3, pad='same'))
_ = batch_norm(Conv2DDNNLayer(_, 128, 3, pad='same'))
_ = batch_norm(Conv2DDNNLayer(_, IMG_C, 3, pad='same', nonlinearity=lasagne.nonlinearities.rectify))
l_generator = FlattenLayer(_)
def clip(x):
return T.clip(x, 1e-7, 1-1e-7)
output_generator_deterministic = get_output(l_generator, deterministic=True)
#
output_discriminator = get_output(l_discriminator) #, batch_norm_use_averages=True)
output_Q_Y = get_output(l_Q_Y)
output_generator = get_output(l_generator)
output_generator_discriminator = get_output(l_discriminator, inputs=output_generator, deterministic=True)
output_generator_Q_Y = get_output(l_Q_Y, inputs=output_generator, deterministic=True)
loss_discriminator0 = loss_discriminator = binary_crossentropy(clip(output_discriminator), target_var).mean()
loss_generator0 = loss_generator = binary_crossentropy(clip(output_generator_discriminator), T.ones_like(output_generator_discriminator)).mean()
loss_generator_Q_Y = categorical_crossentropy(clip(output_generator_Q_Y), input_Y).mean()
loss_discriminator_Q_Y = loss_generator_Q_Y
loss_discriminator += 0.01 *loss_discriminator_Q_Y
loss_generator += loss_generator_Q_Y
accuracy_discriminator = binary_accuracy(output_discriminator, target_var).mean()
accuracy_generator = binary_accuracy(output_generator_discriminator,
T.zeros_like(output_generator_discriminator)).mean()
accuracy_generator_Q_Y = categorical_accuracy(output_generator_Q_Y, input_Y).mean()
params_discriminator = lasagne.layers.get_all_params([l_discriminator, l_Q_Y], trainable=True)
params_generator = lasagne.layers.get_all_params(l_generator, trainable=True)
updates_generator = lasagne.updates.adam(loss_generator,
params_generator,
learning_rate=4e-4, beta1=0.5)
updates_discriminator = lasagne.updates.adam(loss_discriminator,
params_discriminator,
learning_rate=2e-4, beta1=0.5)
train_generator_fn = theano.function([input_Y, input_Z],
(loss_generator0, loss_generator_Q_Y,
accuracy_generator, accuracy_generator_Q_Y),
updates=updates_generator)
train_discriminator_fn = theano.function([input_X, input_Y, input_Z, target_var],
(loss_discriminator0, loss_generator_Q_Y,
accuracy_discriminator, accuracy_generator_Q_Y),
updates=updates_discriminator)
generator_fn = theano.function([input_Y, input_Z], output_generator_deterministic)
In [14]:
logf = open('mnist-infogan.log', 'w')
import sys
def printx(*args, **kwargs):
print(*args, **kwargs)
print(*args, file=logf, **kwargs)
logf.flush()
sys.stdout.flush()
In [15]:
import sys
from random import randint
X=train_set[0] #[train_set[1]==5]
Y=train_set[1].astype('int32')
X_test = test_set[0]
Y_test = test_set[1].astype('int32')
last_imgs = None
j = 0
batch_size=256
In [16]:
def sample_X(batch_size=batch_size):
idx = np.random.randint(0, X.shape[0], size=batch_size)
return X[idx]
def sample_Y(batch_size=batch_size):
input_Y = np.zeros((batch_size, 10), dtype='float32')
random_y=np.random.randint(0, 10, size=batch_size)
input_Y[np.arange(batch_size), random_y] = 1
return input_Y
def sample_Z(batch_size=batch_size):
return (np.random.normal(size=(batch_size, NU))).astype('float32')
In [17]:
for j in range(j, 100*100):
x = sample_X()
x_fake = generator_fn(sample_Y(), sample_Z())
is_real = np.random.randint(0,2,size=batch_size)
x_mixed = np.array([x[i] if is_real[i] else x_fake[i] for i in range(batch_size)], dtype='float32')
is_real = is_real.reshape((-1,1)).astype('float32')
d_err, q_err, d_acc, q_acc = train_discriminator_fn(x_mixed, sample_Y(), sample_Z(), is_real)
#print("generator phase")
for __ in range(2):
g_err, q_err2, g_acc, q_acc2 = train_generator_fn(sample_Y(), sample_Z())
if j%100==0:
printx("j=", j)
Y=np.zeros((100,10),dtype='float32')
Y[arange(100), arange(100)//10]=1
imgs = generator_fn(Y, sample_Z(100))
for i in range(0, 100, 20):
show(imgs[i:i+20])
printx("d_err", d_err, d_acc)
printx("q_err", q_err, q_acc)
printx("g_err", g_err, g_acc)
printx("q_err", q_err2, q_acc2)
In [18]:
Y=np.zeros((100,10),dtype='float32')
Y[arange(100), arange(100)//10]=1
imgs = generator_fn(Y, sample_Z(100))
for i in range(0, 100, 10):
show(imgs[i:i+10])
In [19]:
#np.savez('cifar10_gan_classifier_generator.npz', lasagne.layers.get_all_param_values(l_generator))
#np.savez('cifar10_gan_classifier_discriminator.npz', lasagne.layers.get_all_param_values(l_discriminator))
#np.savez('cifar10_gan_classifier_classifier.npz', lasagne.layers.get_all_param_values(l_classifier))
In [20]:
import scipy.stats
ppf = scipy.stats.norm.ppf
pic=None
N = 16
for x in range(N):
row = None
z = np.asarray([ [ppf(0.05*(x+1)), ppf(0.05*(y+1))]+[0]*(NU-2) for y in range(N)] , dtype=theano.config.floatX)
row = generator_fn(z).reshape(-1, IMG_H, IMG_W)
row = row.swapaxes(0,1).reshape(IMG_H, -1)
pic = row if pic is None else np.concatenate((pic,row), axis=0)
display_img_array(pic)
In [ ]: