My Session 3


In [1]:
# First check the Python version
import sys
if sys.version_info < (3,4):
    print('You are running an older version of Python!\n\n' \
          'You should consider updating to Python 3.4.0 or ' \
          'higher as the libraries built for this course ' \
          'have only been tested in Python 3.4 and higher.\n')
    print('Try installing the Python 3.5 version of anaconda '
          'and then restart `jupyter notebook`:\n' \
          'https://www.continuum.io/downloads\n\n')

# Now get necessary libraries
try:
    import os
    import numpy as np
    import matplotlib.pyplot as plt
    from skimage.transform import resize
    from skimage import data
    from scipy.misc import imresize
    import IPython.display as ipyd
except ImportError:
    print('You are missing some packages! ' \
          'We will try installing them before continuing!')
    !pip install "numpy>=1.11.0" "matplotlib>=1.5.1" "scikit-image>=0.11.3" "scikit-learn>=0.17" "scipy>=0.17.0"
    import os
    import numpy as np
    import matplotlib.pyplot as plt
    from skimage.transform import resize
    from skimage import data
    from scipy.misc import imresize
    import IPython.display as ipyd
    print('Done!')

# Import Tensorflow
try:
    import tensorflow as tf
except ImportError:
    print("You do not have tensorflow installed!")
    print("Follow the instructions on the following link")
    print("to install tensorflow before continuing:")
    print("")
    print("https://github.com/pkmital/CADL#installation-preliminaries")

# This cell includes the provided libraries from the zip file
# and a library for displaying images from ipython, which
# we will use to display the gif
try:
    from libs import utils, gif, datasets, dataset_utils, vae, dft, tboard
except ImportError:
    print("Make sure you have started notebook in the same directory" +
          " as the provided zip file which includes the 'libs' folder" +
          " and the file 'utils.py' inside of it.  You will NOT be able"
          " to complete this assignment unless you restart jupyter"
          " notebook inside the directory created by extracting"
          " the zip file or cloning the github repo.")

# We'll tell matplotlib to inline any drawn figures like so:
%matplotlib inline
plt.style.use('ggplot')

Part One - Autoencoders


In [2]:
def crop_edge(img, cropped_rate):
    """Crop arbitrary amount of pixel.
    """
    row_i = int(img.shape[0] * cropped_rate) // 2
    col_i = int(img.shape[1] * cropped_rate) // 2
    return img[row_i:-row_i, col_i:-col_i]

In [3]:
# See how this works w/ Celeb Images or try your own dataset instead:
dirname = '../data/pokemon/jpeg/'

# Load every image file in the provided directory
filenames = [os.path.join(dirname, fname)
             for fname in os.listdir(dirname)]
# imgs = [plt.imread(fname)[..., :3] for fname in filenames]

imgs = [imresize(crop_edge(plt.imread(f), 0.4), (64, 64)) for f in filenames]
imgs = np.array(imgs)

# Shuffle and limit the number of files to process
nb_clip = 100
np.random.shuffle(imgs)
imgs = imgs[:nb_clip]

# Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array):
Xs = imgs.reshape(-1, 64, 64, 3 )

print(Xs.shape)
assert(Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100)


(100, 64, 64, 3)

In [4]:
ds = datasets.Dataset(Xs)

Normalization


In [5]:
mean_img = ds.mean().astype(np.uint8)
plt.imshow(mean_img)
# If your image comes out entirely black, try w/o the `astype(np.uint8)`
# that means your images are read in as 0-255, rather than 0-1 and 
# this simply depends on the version of matplotlib you are using.


Out[5]:
<matplotlib.image.AxesImage at 0x7ff784b63da0>

In [6]:
std_img = ds.std()
plt.imshow(std_img)
print(std_img.shape)


(64, 64, 3)

In [7]:
std_img = np.mean(std_img, axis=2)
plt.imshow(std_img)


Out[7]:
<matplotlib.image.AxesImage at 0x7ff73d6beef0>

In [8]:
plt.imshow(ds.X[0])
print(ds.X.shape)


(100, 64, 64, 3)

In [9]:
plt.imshow(ds.X[0].mean(axis=2))


Out[9]:
<matplotlib.image.AxesImage at 0x7ff73d5a6240>

In [10]:
plt.imshow(ds.X[0].std(axis=2))


Out[10]:
<matplotlib.image.AxesImage at 0x7ff73d58fbe0>

Preprocess and deprocess


In [11]:
# Write a function to preprocess/normalize an image, given its dataset object
# (which stores the mean and standard deviation!)
def preprocess(img, ds):
    norm_img = (img - ds.mean()) / ds.std()
    return norm_img

# Write a function to undo the normalization of an image, given its dataset object
# (which stores the mean and standard deviation!)
def deprocess(norm_img, ds):
    img = norm_img * ds.std() + ds.mean()
    return img

In [12]:
nb_features = len(ds.X[0].flatten())
print(nb_features)


12288

In [13]:
64*64*3


Out[13]:
12288

In [14]:
encoder_dimensions = [1024, 64, 4]

Graph time


In [15]:
tf.reset_default_graph()

In [16]:
X = tf.placeholder(np.float32, [None, nb_features])
                   
assert(X.get_shape().as_list() == [None, nb_features])

Encoder


In [17]:
def encode(X, dimensions, activation=tf.nn.tanh):
    # We're going to keep every matrix we create so let's create a list to hold them all
    Ws = []
    # We'll create a for loop to create each layer:
    for layer_i, nb_output in enumerate(dimensions):

        # TODO: just like in the last session,
        # we'll use a variable scope to help encapsulate our variables
        # This will simply prefix all the variables made in this scope
        # with the name we give it.  Make sure it is a unique name
        # for each layer, e.g., 'encoder/layer1', 'encoder/layer2', or
        # 'encoder/1', 'encoder/2',... 
        with tf.variable_scope('encode/layer{}'.format(layer_i)):

            # TODO: Create a weight matrix which will increasingly reduce
            # down the amount of information in the input by performing
            # a matrix multiplication.  You can use the utils.linear function.
            h, W = utils.linear(X, nb_output, activation=activation)
            
            # TODO: Apply an activation function (unless you used the parameter
            # for activation function in the utils.linear call)

            # Finally we'll store the weight matrix.
            # We need to keep track of all
            # the weight matrices we've used in our encoder
            # so that we can build the decoder using the
            # same weight matrices.
            Ws.append(W)
            
            # Replace X with the current layer's output, so we can
            # use it in the next layer.
            X = h
            print("layer/{}/X".format(layer_i))
            print(X.get_shape())

    z = X
    print("enode/global/X/")
    print(X.get_shape())
    return Ws, z

In [18]:
# Then call the function
Ws, Z = encode(X, encoder_dimensions)

# And just some checks to make sure you've done it right.
# assert(Z.get_shape().as_list() == [None, 2])
# assert(len(Ws) == len(encoder_dimensions))


layer/0/X
(?, 1024)
layer/1/X
(?, 64)
layer/2/X
(?, 4)
enode/global/X/
(?, 4)

In [19]:
[W_i.get_shape().as_list() for W_i in Ws]


Out[19]:
[[12288, 1024], [1024, 64], [64, 4]]

In [20]:
Z.get_shape().as_list()


Out[20]:
[None, 4]

Decoder


In [21]:
# We'll first reverse the order of our weight matrices
decoder_Ws = Ws[::-1]

# then reverse the order of our dimensions
# appending the last layers number of inputs.
decoder_dimensions = encoder_dimensions[::-1][1:] + [nb_features]
print(decoder_dimensions)

assert(decoder_dimensions[-1] == nb_features)


[64, 1024, 12288]

In [22]:
def decode(z, dimensions, Ws, activation=tf.nn.tanh):
    current_input = z
    for layer_i, n_output in enumerate(dimensions):
        # we'll use a variable scope again to help encapsulate our variables
        # This will simply prefix all the variables made in this scope
        # with the name we give it.
        with tf.variable_scope("decoder/layer/{}".format(layer_i)):

            # Now we'll grab the weight matrix we created before and transpose it
            # So a 3072 x 784 matrix would become 784 x 3072
            # or a 256 x 64 matrix, would become 64 x 256
            W = tf.transpose(Ws[layer_i])

            # Now we'll multiply our input by our transposed W matrix
            h = tf.matmul(current_input, W)

            # And then use a relu activation function on its output
            current_input = activation(h)

            # We'll also replace n_input with the current n_output, so that on the
            # next iteration, our new number inputs will be correct.
            n_input = n_output
    Y = current_input
    return Y

In [23]:
Y = decode(Z, decoder_dimensions, decoder_Ws)

In [24]:
Y.get_shape().as_list()


Out[24]:
[None, 12288]

Optimizer


In [25]:
# Calculate some measure of loss, e.g. the pixel to pixel absolute difference or squared difference
# loss = tf.reduce_mean(tf.squared_difference(X, Y), 1)
loss = tf.squared_difference(X, Y)
# Now sum over every pixel and then calculate the mean over the batch dimension (just like session 2!)
# hint, use tf.reduce_mean and tf.reduce_sum
cost = tf.reduce_sum(loss)

In [26]:
learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate==learning_rate).minimize(cost)

In [27]:
# (TODO) Create a tensorflow session and initialize all of our weights:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

All aboard the Tensorboard


In [28]:
from libs import tboard
tboard.show_graph(tf.get_default_graph().as_graph_def())


Training


In [30]:
# Some parameters for training
batch_size = 100
nb_epochs = 401
step = 10

# We'll try to reconstruct the same first 100 images and show how
# The network does over the course of training.
examples = ds.X[:100]

# We have to preprocess the images before feeding them to the network.
# I'll do this once here, so we don't have to do it every iteration.
test_examples = preprocess(examples, ds).reshape(-1, nb_features)

# If we want to just visualize them, we can create a montage.
test_images = utils.montage(examples).astype(np.uint8)

# Store images so we can make a gif
gifs = []

# Now for our training:
for epoch_i in range(nb_epochs):
    
    # Keep track of the cost
    this_cost = 0
    
    # Iterate over the entire dataset in batches
    for batch_X, _ in ds.train.next_batch(batch_size=batch_size):
        
        # (TODO) Preprocess and reshape our current batch, batch_X:
        this_batch = preprocess(batch_X, ds).reshape(-1, nb_features)
        
        # Compute the cost, and run the optimizer.
        this_cost += sess.run([cost, optimizer], feed_dict={X: this_batch})[0]
    
    # Average cost of this epoch
    avg_cost = this_cost / ds.X.shape[0] / batch_size
    print(epoch_i, avg_cost)
    
    # Let's also try to see how the network currently reconstructs the input.
    # We'll draw the reconstruction every `step` iterations.
    if epoch_i % step == 0:
        
        # (TODO) Ask for the output of the network, Y, and give it our test examples
        recon = sess.run(Y, feed_dict={X: batch_X.reshape(-1, nb_features)})
                         
        # Resize the 2d to the 4d representation:
        rsz = recon.reshape(examples.shape)

        # We have to unprocess the image now, removing the normalization
        unnorm_img = deprocess(rsz, ds)
                         
        # Clip to avoid saturation
        # TODO: Make sure this image is the correct range, e.g.
        # for float32 0-1, you should clip between 0 and 1
        # for uint8 0-255, you should clip between 0 and 255!
        clipped = np.clip(unnorm_img, 0, 255)

        # And we can create a montage of the reconstruction
        recon = utils.montage(clipped).astype(np.uint8)
        
        # Store for gif
        gifs.append(recon)

        fig, axs = plt.subplots(1, 2, figsize=(10, 10))
        axs[0].imshow(test_images)
        axs[0].set_title('Original')
        axs[1].imshow(recon)
        axs[1].set_title('Synthesis')
        fig.canvas.draw()
        plt.show()


0 166.6151875
1 166.6151375
2 166.6151875
3 166.615125
4 166.6151375
5 166.6152125
6 166.6151625
7 166.6151375
8 166.6151125
9 166.6151625
10 166.61515