WNixalo – 2018/6/18
Going through the code in Intuitively Understanding Variational Autoencoders
In [1]:
%matplotlib inline
In [6]:
import numpy as np
import keras
import keras.backend as K
from keras.layers import Lambda
In [3]:
from keras.layers import Input, Dense
from keras.models import Model
# this is the size of our encoded representations
encoding_dim = 32 # 32 floats -> compression of factor 24.5, assuming the input is 784 floats
# this is our input placeholder
input_img = Input(shape=(784,))
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu')(input_img)
# # "decoded" is the lossy reconstruction of the input
# decoded = Dense(784, activation='sigmoid')(encoded)
# # this model maps an input to its reconstruction
# autoencoder = Model(input_img, decoded)
# # this model maps an input to its encoded representation
# encoder = Model(input_img, encoded)
# # create a placeholder for an encoded (32-dimensional) input
# encoded_input = Input(shape=(encoding_dim,))
# # retrieve the last layer of the autoencoder model
# decoder_layer = autoencoder.layers[-1]
# # create the decoder model
# decoder = Model(encoded_input, decoder_layer(encoded_input))
# autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
In [ ]:
# from keras.datasets import mnist
# import numpy as np
# (x_train, _), (x_test, _) = mnist.load_data()
# x_train = x_train.astype('float32') / 255.
# x_test = x_test.astype('float32') / 255.
# x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
# x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
# print x_train.shape
# print x_test.shape
# autoencoder.fit(x_train, x_train,epochs=50,batch_size=256,
# shuffle=True,validation_data=(x_test, x_test))
# # encode and decode some digits
# # note that we take them from the *test* set
# encoded_imgs = encoder.predict(x_test)
# decoded_imgs = decoder.predict(encoded_imgs)
In [4]:
hidden = encoded
In [39]:
# build your encoder upto here. It can simply be a series of dense layers, a convolutional network
# or even an LSTM decoder. Once made, flatten out the final layer of the encoder, call it hidden.
# we use Keras to build the graph
latent_size = 5
mean = Dense(latent_size)(hidden)
# we usually don't directly compute the stddev σ
# but the log of the stddev instead, which is log(σ)
# the reasoning is similar to why we use softmax, instead of directly outputting
# numbers in fixed range [0, 1], the network can output a wider range of numbers which we can later compress down
log_stddev = Dense(latent_size)(hidden)
def sampler(args):
mean, log_stddev = args
# we sample from the standard normal a matrix of batch_size * latent_size (taking into account minibatches)
std_norm = K.random_normal(shape=(K.shape(mean)[0], latent_size), mean=0, stddev=1)
# sampling from Z~N(μ, σ^2) is the same as sampling from μ + σX, X~N(0,1)
return mean + K.exp(log_stddev) * std_norm
latent_vector = Lambda(sampler)([mean, log_stddev])
# pass latent_vector as input to decoder layers
In [32]:
def vae_loss(input_img, output):
# compute the average MSE error, then scale it up, ie. simply sum on all axes
reconstruction_loss = K.sum(K.square(output-input_img))
# compute the KL loss
kl_loss = - 0.5 * K.sum(1 + log_stddev - K.square(mean) - K.square(K.exp(log_stddev)), axis=-1)
# return the average loss over all images in batch
total_loss = K.mean(reconstruction_loss + kl_loss)
return total_loss
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [27]:
batch_size=16
original_dim=784
intermediate_dim=32
latent_dim = 16
epsilon_std=0.01
In [28]:
x = Input(batch_shape=(batch_size, original_dim))
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_sigma = Dense(latent_dim)(h)
In [33]:
def sampling(args):
z_mean, z_log_sigma = args
epsilon = K.random_normal(shape=(batch_size, latent_dim),
mean=0., stddev=epsilon_std)
return z_mean + K.exp(z_log_sigma) * epsilon
# note that "output_shape" isn't necessary with the TensorFlow backend
# so you could write `Lambda(sampling)([z_mean, z_log_sigma])`
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
In [34]:
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)
In [35]:
# end-to-end autoencoder
vae = Model(x, x_decoded_mean)
# encoder, from inputs to latent space
encoder = Model(x, z_mean)
# generator, from latent space to reconstructed inputs
decoder_input = Input(shape=(latent_dim,))
_h_decoded = decoder_h(decoder_input)
_x_decoded_mean = decoder_mean(_h_decoded)
generator = Model(decoder_input, _x_decoded_mean)
In [36]:
def vae_loss(x, x_decoded_mean):
xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
return xent_loss + kl_loss
vae.compile(optimizer='rmsprop', loss=vae_loss)
In [ ]:
In [ ]: