My Session 3


In [1]:
# First check the Python version
import sys
if sys.version_info < (3,4):
    print('You are running an older version of Python!\n\n' \
          'You should consider updating to Python 3.4.0 or ' \
          'higher as the libraries built for this course ' \
          'have only been tested in Python 3.4 and higher.\n')
    print('Try installing the Python 3.5 version of anaconda '
          'and then restart `jupyter notebook`:\n' \
          'https://www.continuum.io/downloads\n\n')

# Now get necessary libraries
try:
    import os
    import numpy as np
    import matplotlib.pyplot as plt
    from skimage.transform import resize
    from skimage import data
    from scipy.misc import imresize
    import IPython.display as ipyd
except ImportError:
    print('You are missing some packages! ' \
          'We will try installing them before continuing!')
    !pip install "numpy>=1.11.0" "matplotlib>=1.5.1" "scikit-image>=0.11.3" "scikit-learn>=0.17" "scipy>=0.17.0"
    import os
    import numpy as np
    import matplotlib.pyplot as plt
    from skimage.transform import resize
    from skimage import data
    from scipy.misc import imresize
    import IPython.display as ipyd
    print('Done!')

# Import Tensorflow
try:
    import tensorflow as tf
except ImportError:
    print("You do not have tensorflow installed!")
    print("Follow the instructions on the following link")
    print("to install tensorflow before continuing:")
    print("")
    print("https://github.com/pkmital/CADL#installation-preliminaries")

# This cell includes the provided libraries from the zip file
# and a library for displaying images from ipython, which
# we will use to display the gif
try:
    from libs import utils, gif, datasets, dataset_utils, vae, dft, tboard
except ImportError:
    print("Make sure you have started notebook in the same directory" +
          " as the provided zip file which includes the 'libs' folder" +
          " and the file 'utils.py' inside of it.  You will NOT be able"
          " to complete this assignment unless you restart jupyter"
          " notebook inside the directory created by extracting"
          " the zip file or cloning the github repo.")

# We'll tell matplotlib to inline any drawn figures like so:
%matplotlib inline
plt.style.use('ggplot')

Part One - Autoencoders


In [2]:
def crop_edge(img, cropped_rate):
    """Crop arbitrary amount of pixel.
    """
    row_i = int(img.shape[0] * cropped_rate) // 2
    col_i = int(img.shape[1] * cropped_rate) // 2
    return img[row_i:-row_i, col_i:-col_i]

In [3]:
# See how this works w/ Celeb Images or try your own dataset instead:
dirname = '../data/pokemon/jpeg/'

# Load every image file in the provided directory
filenames = [os.path.join(dirname, fname)
             for fname in os.listdir(dirname)]
# imgs = [plt.imread(fname)[..., :3] for fname in filenames]

imgs = [imresize(crop_edge(plt.imread(f), 0.4), (64, 64)) for f in filenames]
imgs = np.array(imgs)

# Shuffle and limit the number of files to process
nb_clip = 100
np.random.shuffle(imgs)
imgs = imgs[:nb_clip]

# Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array):
Xs = imgs.reshape(-1, 64, 64, 3 )

print(Xs.shape)
assert(Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100)


(100, 64, 64, 3)

In [4]:
ds = datasets.Dataset(Xs)

Normalization


In [5]:
mean_img = ds.mean().astype(np.uint8)
plt.imshow(mean_img)
# If your image comes out entirely black, try w/o the `astype(np.uint8)`
# that means your images are read in as 0-255, rather than 0-1 and 
# this simply depends on the version of matplotlib you are using.


Out[5]:
<matplotlib.image.AxesImage at 0x7ff784b63da0>

In [6]:
std_img = ds.std()
plt.imshow(std_img)
print(std_img.shape)


(64, 64, 3)

In [7]:
std_img = np.mean(std_img, axis=2)
plt.imshow(std_img)


Out[7]:
<matplotlib.image.AxesImage at 0x7ff73d6beef0>

In [8]:
plt.imshow(ds.X[0])
print(ds.X.shape)


(100, 64, 64, 3)

In [9]:
plt.imshow(ds.X[0].mean(axis=2))


Out[9]:
<matplotlib.image.AxesImage at 0x7ff73d5a6240>

In [10]:
plt.imshow(ds.X[0].std(axis=2))


Out[10]:
<matplotlib.image.AxesImage at 0x7ff73d58fbe0>

Preprocess and deprocess


In [11]:
# Write a function to preprocess/normalize an image, given its dataset object
# (which stores the mean and standard deviation!)
def preprocess(img, ds):
    norm_img = (img - ds.mean()) / ds.std()
    return norm_img

# Write a function to undo the normalization of an image, given its dataset object
# (which stores the mean and standard deviation!)
def deprocess(norm_img, ds):
    img = norm_img * ds.std() + ds.mean()
    return img

In [12]:
nb_features = len(ds.X[0].flatten())
print(nb_features)


12288

In [13]:
64*64*3


Out[13]:
12288

In [14]:
encoder_dimensions = [1024, 64, 4]

Graph time


In [15]:
tf.reset_default_graph()

In [16]:
X = tf.placeholder(np.float32, [None, nb_features])
                   
assert(X.get_shape().as_list() == [None, nb_features])

Encoder


In [17]:
def encode(X, dimensions, activation=tf.nn.tanh):
    # We're going to keep every matrix we create so let's create a list to hold them all
    Ws = []
    # We'll create a for loop to create each layer:
    for layer_i, nb_output in enumerate(dimensions):

        # TODO: just like in the last session,
        # we'll use a variable scope to help encapsulate our variables
        # This will simply prefix all the variables made in this scope
        # with the name we give it.  Make sure it is a unique name
        # for each layer, e.g., 'encoder/layer1', 'encoder/layer2', or
        # 'encoder/1', 'encoder/2',... 
        with tf.variable_scope('encode/layer{}'.format(layer_i)):

            # TODO: Create a weight matrix which will increasingly reduce
            # down the amount of information in the input by performing
            # a matrix multiplication.  You can use the utils.linear function.
            h, W = utils.linear(X, nb_output, activation=activation)
            
            # TODO: Apply an activation function (unless you used the parameter
            # for activation function in the utils.linear call)

            # Finally we'll store the weight matrix.
            # We need to keep track of all
            # the weight matrices we've used in our encoder
            # so that we can build the decoder using the
            # same weight matrices.
            Ws.append(W)
            
            # Replace X with the current layer's output, so we can
            # use it in the next layer.
            X = h
            print("layer/{}/X".format(layer_i))
            print(X.get_shape())

    z = X
    print("enode/global/X/")
    print(X.get_shape())
    return Ws, z

In [18]:
# Then call the function
Ws, Z = encode(X, encoder_dimensions)

# And just some checks to make sure you've done it right.
# assert(Z.get_shape().as_list() == [None, 2])
# assert(len(Ws) == len(encoder_dimensions))


layer/0/X
(?, 1024)
layer/1/X
(?, 64)
layer/2/X
(?, 4)
enode/global/X/
(?, 4)

In [19]:
[W_i.get_shape().as_list() for W_i in Ws]


Out[19]:
[[12288, 1024], [1024, 64], [64, 4]]

In [20]:
Z.get_shape().as_list()


Out[20]:
[None, 4]

Decoder


In [21]:
# We'll first reverse the order of our weight matrices
decoder_Ws = Ws[::-1]

# then reverse the order of our dimensions
# appending the last layers number of inputs.
decoder_dimensions = encoder_dimensions[::-1][1:] + [nb_features]
print(decoder_dimensions)

assert(decoder_dimensions[-1] == nb_features)


[64, 1024, 12288]

In [22]:
def decode(z, dimensions, Ws, activation=tf.nn.tanh):
    current_input = z
    for layer_i, n_output in enumerate(dimensions):
        # we'll use a variable scope again to help encapsulate our variables
        # This will simply prefix all the variables made in this scope
        # with the name we give it.
        with tf.variable_scope("decoder/layer/{}".format(layer_i)):

            # Now we'll grab the weight matrix we created before and transpose it
            # So a 3072 x 784 matrix would become 784 x 3072
            # or a 256 x 64 matrix, would become 64 x 256
            W = tf.transpose(Ws[layer_i])

            # Now we'll multiply our input by our transposed W matrix
            h = tf.matmul(current_input, W)

            # And then use a relu activation function on its output
            current_input = activation(h)

            # We'll also replace n_input with the current n_output, so that on the
            # next iteration, our new number inputs will be correct.
            n_input = n_output
    Y = current_input
    return Y

In [23]:
Y = decode(Z, decoder_dimensions, decoder_Ws)

In [24]:
Y.get_shape().as_list()


Out[24]:
[None, 12288]

Optimizer


In [25]:
# Calculate some measure of loss, e.g. the pixel to pixel absolute difference or squared difference
# loss = tf.reduce_mean(tf.squared_difference(X, Y), 1)
loss = tf.squared_difference(X, Y)
# Now sum over every pixel and then calculate the mean over the batch dimension (just like session 2!)
# hint, use tf.reduce_mean and tf.reduce_sum
cost = tf.reduce_sum(loss)

In [26]:
learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate==learning_rate).minimize(cost)

In [27]:
# (TODO) Create a tensorflow session and initialize all of our weights:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

All aboard the Tensorboard


In [28]:
from libs import tboard
tboard.show_graph(tf.get_default_graph().as_graph_def())


Training


In [30]:
# Some parameters for training
batch_size = 100
nb_epochs = 401
step = 10

# We'll try to reconstruct the same first 100 images and show how
# The network does over the course of training.
examples = ds.X[:100]

# We have to preprocess the images before feeding them to the network.
# I'll do this once here, so we don't have to do it every iteration.
test_examples = preprocess(examples, ds).reshape(-1, nb_features)

# If we want to just visualize them, we can create a montage.
test_images = utils.montage(examples).astype(np.uint8)

# Store images so we can make a gif
gifs = []

# Now for our training:
for epoch_i in range(nb_epochs):
    
    # Keep track of the cost
    this_cost = 0
    
    # Iterate over the entire dataset in batches
    for batch_X, _ in ds.train.next_batch(batch_size=batch_size):
        
        # (TODO) Preprocess and reshape our current batch, batch_X:
        this_batch = preprocess(batch_X, ds).reshape(-1, nb_features)
        
        # Compute the cost, and run the optimizer.
        this_cost += sess.run([cost, optimizer], feed_dict={X: this_batch})[0]
    
    # Average cost of this epoch
    avg_cost = this_cost / ds.X.shape[0] / batch_size
    print(epoch_i, avg_cost)
    
    # Let's also try to see how the network currently reconstructs the input.
    # We'll draw the reconstruction every `step` iterations.
    if epoch_i % step == 0:
        
        # (TODO) Ask for the output of the network, Y, and give it our test examples
        recon = sess.run(Y, feed_dict={X: batch_X.reshape(-1, nb_features)})
                         
        # Resize the 2d to the 4d representation:
        rsz = recon.reshape(examples.shape)

        # We have to unprocess the image now, removing the normalization
        unnorm_img = deprocess(rsz, ds)
                         
        # Clip to avoid saturation
        # TODO: Make sure this image is the correct range, e.g.
        # for float32 0-1, you should clip between 0 and 1
        # for uint8 0-255, you should clip between 0 and 255!
        clipped = np.clip(unnorm_img, 0, 255)

        # And we can create a montage of the reconstruction
        recon = utils.montage(clipped).astype(np.uint8)
        
        # Store for gif
        gifs.append(recon)

        fig, axs = plt.subplots(1, 2, figsize=(10, 10))
        axs[0].imshow(test_images)
        axs[0].set_title('Original')
        axs[1].imshow(recon)
        axs[1].set_title('Synthesis')
        fig.canvas.draw()
        plt.show()


0 166.6151875
1 166.6151375
2 166.6151875
3 166.615125
4 166.6151375
5 166.6152125
6 166.6151625
7 166.6151375
8 166.6151125
9 166.6151625
10 166.61515
11 166.6152125
12 166.615025
13 166.615075
14 166.615225
15 166.61515
16 166.615125
17 166.615175
18 166.61515
19 166.6151625
20 166.6151375
21 166.6152
22 166.615125
23 166.6151
24 166.6151375
25 166.6151375
26 166.61525
27 166.6151875
28 166.6151125
29 166.615175
30 166.6151375
31 166.6151875
32 166.61515
33 166.6151125
34 166.6151875
35 166.6151625
36 166.6151875
37 166.6151875
38 166.6150875
39 166.61515
40 166.615175
41 166.6151625
42 166.6151
43 166.615175
44 166.6152125
45 166.6152125
46 166.6151625
47 166.615125
48 166.6151375
49 166.6152875
50 166.6151625
51 166.61515
52 166.6151875
53 166.6151
54 166.6151375
55 166.6151
56 166.615175
57 166.6151875
58 166.6151
59 166.6152
60 166.615125
61 166.615175
62 166.6151625
63 166.6151875
64 166.615175
65 166.6152625
66 166.6151375
67 166.615175
68 166.6151375
69 166.6151
70 166.6152
71 166.6151125
72 166.615225
73 166.61515
74 166.615175
75 166.615125
76 166.6151625
77 166.6150875
78 166.6151875
79 166.6152
80 166.615225
81 166.615175
82 166.61515
83 166.61515
84 166.6151375
85 166.6151625
86 166.6151
87 166.6151125
88 166.6151625
89 166.6151125
90 166.61515
91 166.615125
92 166.6151375
93 166.61515
94 166.6151625
95 166.6151625
96 166.6151125
97 166.6152
98 166.615125
99 166.6151625
100 166.6151
101 166.615175
102 166.615125
103 166.6151875
104 166.6152
105 166.6152
106 166.6150625
107 166.6150875
108 166.61515
109 166.6152
110 166.61515
111 166.615175
112 166.6151375
113 166.61515
114 166.615075
115 166.6151375
116 166.6151375
117 166.6151625
118 166.6150625
119 166.615125
120 166.615125
121 166.61515
122 166.615125
123 166.615225
124 166.6151125
125 166.6152
126 166.615175
127 166.615075
128 166.615125
129 166.6151125
130 166.6152