# First check the Python version
import sys
if sys.version_info < (3,4):
print('You are running an older version of Python!\n\n' \
'You should consider updating to Python 3.4.0 or ' \
'higher as the libraries built for this course ' \
'have only been tested in Python 3.4 and higher.\n')
print('Try installing the Python 3.5 version of anaconda '
'and then restart `jupyter notebook`:\n' \
# Now get necessary libraries
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage import data
from scipy.misc import imresize
import IPython.display as ipyd
except ImportError:
print('You are missing some packages! ' \
'We will try installing them before continuing!')
!pip install "numpy>=1.11.0" "matplotlib>=1.5.1" "scikit-image>=0.11.3" "scikit-learn>=0.17" "scipy>=0.17.0"
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage import data
from scipy.misc import imresize
import IPython.display as ipyd
# Import Tensorflow
import tensorflow as tf
except ImportError:
print("You do not have tensorflow installed!")
print("Follow the instructions on the following link")
print("to install tensorflow before continuing:")
# This cell includes the provided libraries from the zip file
# and a library for displaying images from ipython, which
# we will use to display the gif
from libs import utils, gif, datasets, dataset_utils, vae, dft, tboard
except ImportError:
print("Make sure you have started notebook in the same directory" +
" as the provided zip file which includes the 'libs' folder" +
" and the file '' inside of it. You will NOT be able"
" to complete this assignment unless you restart jupyter"
" notebook inside the directory created by extracting"
" the zip file or cloning the github repo.")
# We'll tell matplotlib to inline any drawn figures like so:
%matplotlib inline'ggplot')
def crop_edge(img, cropped_rate):
"""Crop arbitrary amount of pixel.
row_i = int(img.shape[0] * cropped_rate) // 2
col_i = int(img.shape[1] * cropped_rate) // 2
return img[row_i:-row_i, col_i:-col_i]
# See how this works w/ Celeb Images or try your own dataset instead:
dirname = '../data/pokemon/jpeg/'
# Load every image file in the provided directory
filenames = [os.path.join(dirname, fname)
for fname in os.listdir(dirname)]
# imgs = [plt.imread(fname)[..., :3] for fname in filenames]
imgs = [imresize(crop_edge(plt.imread(f), 0.4), (64, 64)) for f in filenames]
imgs = np.array(imgs)
# Shuffle and limit the number of files to process
nb_clip = 100
imgs = imgs[:nb_clip]
# Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array):
Xs = imgs.reshape(-1, 64, 64, 3 )
assert(Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100)
ds = datasets.Dataset(Xs)
mean_img = ds.mean().astype(np.uint8)
# If your image comes out entirely black, try w/o the `astype(np.uint8)`
# that means your images are read in as 0-255, rather than 0-1 and
# this simply depends on the version of matplotlib you are using.
std_img = ds.std()
std_img = np.mean(std_img, axis=2)
# Write a function to preprocess/normalize an image, given its dataset object
# (which stores the mean and standard deviation!)
def preprocess(img, ds):
norm_img = (img - ds.mean()) / ds.std()
return norm_img
# Write a function to undo the normalization of an image, given its dataset object
# (which stores the mean and standard deviation!)
def deprocess(norm_img, ds):
img = norm_img * ds.std() + ds.mean()
return img
nb_features = len(ds.X[0].flatten())
encoder_dimensions = [1024, 64, 4]
X = tf.placeholder(np.float32, [None, nb_features])
assert(X.get_shape().as_list() == [None, nb_features])
def encode(X, dimensions, activation=tf.nn.tanh):
# We're going to keep every matrix we create so let's create a list to hold them all
Ws = []
# We'll create a for loop to create each layer:
for layer_i, nb_output in enumerate(dimensions):
# TODO: just like in the last session,
# we'll use a variable scope to help encapsulate our variables
# This will simply prefix all the variables made in this scope
# with the name we give it. Make sure it is a unique name
# for each layer, e.g., 'encoder/layer1', 'encoder/layer2', or
# 'encoder/1', 'encoder/2',...
with tf.variable_scope('encode/layer{}'.format(layer_i)):
# TODO: Create a weight matrix which will increasingly reduce
# down the amount of information in the input by performing
# a matrix multiplication. You can use the utils.linear function.
h, W = utils.linear(X, nb_output, activation=activation)
# TODO: Apply an activation function (unless you used the parameter
# for activation function in the utils.linear call)
# Finally we'll store the weight matrix.
# We need to keep track of all
# the weight matrices we've used in our encoder
# so that we can build the decoder using the
# same weight matrices.
# Replace X with the current layer's output, so we can
# use it in the next layer.
X = h
z = X
return Ws, z
# Then call the function
Ws, Z = encode(X, encoder_dimensions)
# And just some checks to make sure you've done it right.
# assert(Z.get_shape().as_list() == [None, 2])
# assert(len(Ws) == len(encoder_dimensions))
[W_i.get_shape().as_list() for W_i in Ws]
# We'll first reverse the order of our weight matrices
decoder_Ws = Ws[::-1]
# then reverse the order of our dimensions
# appending the last layers number of inputs.
decoder_dimensions = encoder_dimensions[::-1][1:] + [nb_features]
assert(decoder_dimensions[-1] == nb_features)
def decode(z, dimensions, Ws, activation=tf.nn.tanh):
current_input = z
for layer_i, n_output in enumerate(dimensions):
# we'll use a variable scope again to help encapsulate our variables
# This will simply prefix all the variables made in this scope
# with the name we give it.
with tf.variable_scope("decoder/layer/{}".format(layer_i)):
# Now we'll grab the weight matrix we created before and transpose it
# So a 3072 x 784 matrix would become 784 x 3072
# or a 256 x 64 matrix, would become 64 x 256
W = tf.transpose(Ws[layer_i])
# Now we'll multiply our input by our transposed W matrix
h = tf.matmul(current_input, W)
# And then use a relu activation function on its output
current_input = activation(h)
# We'll also replace n_input with the current n_output, so that on the
# next iteration, our new number inputs will be correct.
n_input = n_output
Y = current_input
return Y
Y = decode(Z, decoder_dimensions, decoder_Ws)
# Calculate some measure of loss, e.g. the pixel to pixel absolute difference or squared difference
# loss = tf.reduce_mean(tf.squared_difference(X, Y), 1)
loss = tf.squared_difference(X, Y)
# Now sum over every pixel and then calculate the mean over the batch dimension (just like session 2!)
# hint, use tf.reduce_mean and tf.reduce_sum
cost = tf.reduce_sum(loss)
learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate==learning_rate).minimize(cost)
# (TODO) Create a tensorflow session and initialize all of our weights:
sess = tf.InteractiveSession()
from libs import tboard
# Some parameters for training
batch_size = 100
nb_epochs = 401
step = 10
# We'll try to reconstruct the same first 100 images and show how
# The network does over the course of training.
examples = ds.X[:100]
# We have to preprocess the images before feeding them to the network.
# I'll do this once here, so we don't have to do it every iteration.
test_examples = preprocess(examples, ds).reshape(-1, nb_features)
# If we want to just visualize them, we can create a montage.
test_images = utils.montage(examples).astype(np.uint8)
# Store images so we can make a gif
gifs = []
# Now for our training:
for epoch_i in range(nb_epochs):
# Keep track of the cost
this_cost = 0
# Iterate over the entire dataset in batches
for batch_X, _ in ds.train.next_batch(batch_size=batch_size):
# (TODO) Preprocess and reshape our current batch, batch_X:
this_batch = preprocess(batch_X, ds).reshape(-1, nb_features)
# Compute the cost, and run the optimizer.
this_cost +=[cost, optimizer], feed_dict={X: this_batch})[0]
# Average cost of this epoch
avg_cost = this_cost / ds.X.shape[0] / batch_size
print(epoch_i, avg_cost)
# Let's also try to see how the network currently reconstructs the input.
# We'll draw the reconstruction every `step` iterations.
if epoch_i % step == 0:
# (TODO) Ask for the output of the network, Y, and give it our test examples
recon =, feed_dict={X: batch_X.reshape(-1, nb_features)})
# Resize the 2d to the 4d representation:
rsz = recon.reshape(examples.shape)
# We have to unprocess the image now, removing the normalization
unnorm_img = deprocess(rsz, ds)
# Clip to avoid saturation
# TODO: Make sure this image is the correct range, e.g.
# for float32 0-1, you should clip between 0 and 1
# for uint8 0-255, you should clip between 0 and 255!
clipped = np.clip(unnorm_img, 0, 255)
# And we can create a montage of the reconstruction
recon = utils.montage(clipped).astype(np.uint8)
# Store for gif
fig, axs = plt.subplots(1, 2, figsize=(10, 10))