Understanding VAEs

WNixalo – 2018/6/18

Going through the code in Intuitively Understanding Variational Autoencoders


In [1]:
%matplotlib inline

In [6]:
import numpy as np
import keras
import keras.backend as K
from keras.layers import Lambda

In [3]:
from keras.layers import Input, Dense
from keras.models import Model

# this is the size of our encoded representations
encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats

# this is our input placeholder
input_img = Input(shape=(784,))
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu')(input_img)
# # "decoded" is the lossy reconstruction of the input
# decoded = Dense(784, activation='sigmoid')(encoded)

# # this model maps an input to its reconstruction
# autoencoder = Model(input_img, decoded)

# # this model maps an input to its encoded representation
# encoder = Model(input_img, encoded)

# # create a placeholder for an encoded (32-dimensional) input
# encoded_input = Input(shape=(encoding_dim,))
# # retrieve the last layer of the autoencoder model
# decoder_layer = autoencoder.layers[-1]
# # create the decoder model
# decoder = Model(encoded_input, decoder_layer(encoded_input))

# autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

In [ ]:
# from keras.datasets import mnist
# import numpy as np
# (x_train, _), (x_test, _) = mnist.load_data()

# x_train = x_train.astype('float32') / 255.
# x_test = x_test.astype('float32') / 255.
# x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
# x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
# print x_train.shape
# print x_test.shape

# autoencoder.fit(x_train, x_train,epochs=50,batch_size=256,
#                 shuffle=True,validation_data=(x_test, x_test))

# # encode and decode some digits
# # note that we take them from the *test* set
# encoded_imgs = encoder.predict(x_test)
# decoded_imgs = decoder.predict(encoded_imgs)

In [4]:
hidden = encoded

In [39]:
# build your encoder upto here. It can simply be a series of dense layers, a convolutional network
# or even an LSTM decoder. Once made, flatten out the final layer of the encoder, call it hidden.

# we use Keras to build the graph

latent_size = 5
mean = Dense(latent_size)(hidden)

# we usually don't directly compute the stddev σ 
# but the log of the stddev instead, which is log(σ)
# the reasoning is similar to why we use softmax, instead of directly outputting
# numbers in fixed range [0, 1], the network can output a wider range of numbers which we can later compress down
log_stddev = Dense(latent_size)(hidden)

def sampler(args):
    mean, log_stddev = args
    # we sample from the standard normal a matrix of batch_size * latent_size (taking into account minibatches)
    std_norm = K.random_normal(shape=(K.shape(mean)[0], latent_size), mean=0, stddev=1)
    # sampling from Z~N(μ, σ^2) is the same as sampling from μ + σX, X~N(0,1)
    return mean + K.exp(log_stddev) * std_norm
  
latent_vector = Lambda(sampler)([mean, log_stddev])
# pass latent_vector as input to decoder layers

In [32]:
def vae_loss(input_img, output):
    # compute the average MSE error, then scale it up, ie. simply sum on all axes
    reconstruction_loss = K.sum(K.square(output-input_img))
    # compute the KL loss
    kl_loss = - 0.5 * K.sum(1 + log_stddev - K.square(mean) - K.square(K.exp(log_stddev)), axis=-1)
    # return the average loss over all images in batch
    total_loss = K.mean(reconstruction_loss + kl_loss)    
    return total_loss

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [27]:
batch_size=16
original_dim=784
intermediate_dim=32
latent_dim = 16
epsilon_std=0.01

In [28]:
x = Input(batch_shape=(batch_size, original_dim))
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_sigma = Dense(latent_dim)(h)

In [33]:
def sampling(args):
    z_mean, z_log_sigma = args
    epsilon = K.random_normal(shape=(batch_size, latent_dim),
                              mean=0., stddev=epsilon_std)
    return z_mean + K.exp(z_log_sigma) * epsilon

# note that "output_shape" isn't necessary with the TensorFlow backend
# so you could write `Lambda(sampling)([z_mean, z_log_sigma])`
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])

In [34]:
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

In [35]:
# end-to-end autoencoder
vae = Model(x, x_decoded_mean)

# encoder, from inputs to latent space
encoder = Model(x, z_mean)

# generator, from latent space to reconstructed inputs
decoder_input = Input(shape=(latent_dim,))
_h_decoded = decoder_h(decoder_input)
_x_decoded_mean = decoder_mean(_h_decoded)
generator = Model(decoder_input, _x_decoded_mean)

In [36]:
def vae_loss(x, x_decoded_mean):
    xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
    kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
    return xent_loss + kl_loss

vae.compile(optimizer='rmsprop', loss=vae_loss)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-36-f0e5807f03c3> in <module>()
      4     return xent_loss + kl_loss
      5 
----> 6 vae.compile(optimizer='rmsprop', loss=vae_loss)

~/Miniconda3/envs/tfkeras/lib/python3.6/site-packages/keras/engine/training.py in compile(self, optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, target_tensors, **kwargs)
    330                 with K.name_scope(self.output_names[i] + '_loss'):
    331                     output_loss = weighted_loss(y_true, y_pred,
--> 332                                                 sample_weight, mask)
    333                 if len(self.outputs) > 1:
    334                     self.metrics_tensors.append(output_loss)

~/Miniconda3/envs/tfkeras/lib/python3.6/site-packages/keras/engine/training_utils.py in weighted(y_true, y_pred, weights, mask)
    401         """
    402         # score_array has ndim >= 2
--> 403         score_array = fn(y_true, y_pred)
    404         if mask is not None:
    405             # Cast the mask to floatX to avoid float64 upcasting in Theano

<ipython-input-36-f0e5807f03c3> in vae_loss(x, x_decoded_mean)
      1 def vae_loss(x, x_decoded_mean):
----> 2     xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
      3     kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
      4     return xent_loss + kl_loss
      5 

NameError: name 'objectives' is not defined

In [ ]:


In [ ]: