In [1]:
# First check the Python version
import sys
if sys.version_info < (3,4):
print('You are running an older version of Python!\n\n' \
'You should consider updating to Python 3.4.0 or ' \
'higher as the libraries built for this course ' \
'have only been tested in Python 3.4 and higher.\n')
print('Try installing the Python 3.5 version of anaconda '
'and then restart `jupyter notebook`:\n' \
'https://www.continuum.io/downloads\n\n')
# Now get necessary libraries
try:
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage import data
from scipy.misc import imresize
import IPython.display as ipyd
except ImportError:
print('You are missing some packages! ' \
'We will try installing them before continuing!')
!pip install "numpy>=1.11.0" "matplotlib>=1.5.1" "scikit-image>=0.11.3" "scikit-learn>=0.17" "scipy>=0.17.0"
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage import data
from scipy.misc import imresize
import IPython.display as ipyd
print('Done!')
# Import Tensorflow
try:
import tensorflow as tf
except ImportError:
print("You do not have tensorflow installed!")
print("Follow the instructions on the following link")
print("to install tensorflow before continuing:")
print("")
print("https://github.com/pkmital/CADL#installation-preliminaries")
# This cell includes the provided libraries from the zip file
# and a library for displaying images from ipython, which
# we will use to display the gif
try:
from libs import utils, gif, datasets, dataset_utils, vae, dft, tboard
except ImportError:
print("Make sure you have started notebook in the same directory" +
" as the provided zip file which includes the 'libs' folder" +
" and the file 'utils.py' inside of it. You will NOT be able"
" to complete this assignment unless you restart jupyter"
" notebook inside the directory created by extracting"
" the zip file or cloning the github repo.")
# We'll tell matplotlib to inline any drawn figures like so:
%matplotlib inline
plt.style.use('ggplot')
In [2]:
def crop_edge(img, cropped_rate):
"""Crop arbitrary amount of pixel.
"""
row_i = int(img.shape[0] * cropped_rate) // 2
col_i = int(img.shape[1] * cropped_rate) // 2
return img[row_i:-row_i, col_i:-col_i]
In [3]:
# See how this works w/ Celeb Images or try your own dataset instead:
dirname = '../data/pokemon/jpeg/'
# Load every image file in the provided directory
filenames = [os.path.join(dirname, fname)
for fname in os.listdir(dirname)]
# imgs = [plt.imread(fname)[..., :3] for fname in filenames]
imgs = [imresize(crop_edge(plt.imread(f), 0.4), (64, 64)) for f in filenames]
imgs = np.array(imgs)
# Shuffle and limit the number of files to process
nb_clip = 100
np.random.shuffle(imgs)
imgs = imgs[:nb_clip]
# Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array):
Xs = imgs.reshape(-1, 64, 64, 3 )
print(Xs.shape)
assert(Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100)
In [4]:
ds = datasets.Dataset(Xs)
In [5]:
mean_img = ds.mean().astype(np.uint8)
plt.imshow(mean_img)
# If your image comes out entirely black, try w/o the `astype(np.uint8)`
# that means your images are read in as 0-255, rather than 0-1 and
# this simply depends on the version of matplotlib you are using.
Out[5]:
In [6]:
std_img = ds.std()
plt.imshow(std_img)
print(std_img.shape)
In [7]:
std_img = np.mean(std_img, axis=2)
plt.imshow(std_img)
Out[7]:
In [8]:
plt.imshow(ds.X[0])
print(ds.X.shape)
In [9]:
plt.imshow(ds.X[0].mean(axis=2))
Out[9]:
In [10]:
plt.imshow(ds.X[0].std(axis=2))
Out[10]:
In [11]:
# Write a function to preprocess/normalize an image, given its dataset object
# (which stores the mean and standard deviation!)
def preprocess(img, ds):
norm_img = (img - ds.mean()) / ds.std()
return norm_img
# Write a function to undo the normalization of an image, given its dataset object
# (which stores the mean and standard deviation!)
def deprocess(norm_img, ds):
img = norm_img * ds.std() + ds.mean()
return img
In [12]:
nb_features = len(ds.X[0].flatten())
print(nb_features)
In [13]:
64*64*3
Out[13]:
In [14]:
encoder_dimensions = [1024, 64, 4]
In [15]:
tf.reset_default_graph()
In [16]:
X = tf.placeholder(np.float32, [None, nb_features])
assert(X.get_shape().as_list() == [None, nb_features])
In [17]:
def encode(X, dimensions, activation=tf.nn.tanh):
# We're going to keep every matrix we create so let's create a list to hold them all
Ws = []
# We'll create a for loop to create each layer:
for layer_i, nb_output in enumerate(dimensions):
# TODO: just like in the last session,
# we'll use a variable scope to help encapsulate our variables
# This will simply prefix all the variables made in this scope
# with the name we give it. Make sure it is a unique name
# for each layer, e.g., 'encoder/layer1', 'encoder/layer2', or
# 'encoder/1', 'encoder/2',...
with tf.variable_scope('encode/layer{}'.format(layer_i)):
# TODO: Create a weight matrix which will increasingly reduce
# down the amount of information in the input by performing
# a matrix multiplication. You can use the utils.linear function.
h, W = utils.linear(X, nb_output, activation=activation)
# TODO: Apply an activation function (unless you used the parameter
# for activation function in the utils.linear call)
# Finally we'll store the weight matrix.
# We need to keep track of all
# the weight matrices we've used in our encoder
# so that we can build the decoder using the
# same weight matrices.
Ws.append(W)
# Replace X with the current layer's output, so we can
# use it in the next layer.
X = h
print("layer/{}/X".format(layer_i))
print(X.get_shape())
z = X
print("enode/global/X/")
print(X.get_shape())
return Ws, z
In [18]:
# Then call the function
Ws, Z = encode(X, encoder_dimensions)
# And just some checks to make sure you've done it right.
# assert(Z.get_shape().as_list() == [None, 2])
# assert(len(Ws) == len(encoder_dimensions))
In [19]:
[W_i.get_shape().as_list() for W_i in Ws]
Out[19]:
In [20]:
Z.get_shape().as_list()
Out[20]:
In [21]:
# We'll first reverse the order of our weight matrices
decoder_Ws = Ws[::-1]
# then reverse the order of our dimensions
# appending the last layers number of inputs.
decoder_dimensions = encoder_dimensions[::-1][1:] + [nb_features]
print(decoder_dimensions)
assert(decoder_dimensions[-1] == nb_features)
In [22]:
def decode(z, dimensions, Ws, activation=tf.nn.tanh):
current_input = z
for layer_i, n_output in enumerate(dimensions):
# we'll use a variable scope again to help encapsulate our variables
# This will simply prefix all the variables made in this scope
# with the name we give it.
with tf.variable_scope("decoder/layer/{}".format(layer_i)):
# Now we'll grab the weight matrix we created before and transpose it
# So a 3072 x 784 matrix would become 784 x 3072
# or a 256 x 64 matrix, would become 64 x 256
W = tf.transpose(Ws[layer_i])
# Now we'll multiply our input by our transposed W matrix
h = tf.matmul(current_input, W)
# And then use a relu activation function on its output
current_input = activation(h)
# We'll also replace n_input with the current n_output, so that on the
# next iteration, our new number inputs will be correct.
n_input = n_output
Y = current_input
return Y
In [23]:
Y = decode(Z, decoder_dimensions, decoder_Ws)
In [24]:
Y.get_shape().as_list()
Out[24]:
In [25]:
# Calculate some measure of loss, e.g. the pixel to pixel absolute difference or squared difference
# loss = tf.reduce_mean(tf.squared_difference(X, Y), 1)
loss = tf.squared_difference(X, Y)
# Now sum over every pixel and then calculate the mean over the batch dimension (just like session 2!)
# hint, use tf.reduce_mean and tf.reduce_sum
cost = tf.reduce_sum(loss)
In [26]:
learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate==learning_rate).minimize(cost)
In [27]:
# (TODO) Create a tensorflow session and initialize all of our weights:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
In [28]:
from libs import tboard
tboard.show_graph(tf.get_default_graph().as_graph_def())
In [30]:
# Some parameters for training
batch_size = 100
nb_epochs = 401
step = 10
# We'll try to reconstruct the same first 100 images and show how
# The network does over the course of training.
examples = ds.X[:100]
# We have to preprocess the images before feeding them to the network.
# I'll do this once here, so we don't have to do it every iteration.
test_examples = preprocess(examples, ds).reshape(-1, nb_features)
# If we want to just visualize them, we can create a montage.
test_images = utils.montage(examples).astype(np.uint8)
# Store images so we can make a gif
gifs = []
# Now for our training:
for epoch_i in range(nb_epochs):
# Keep track of the cost
this_cost = 0
# Iterate over the entire dataset in batches
for batch_X, _ in ds.train.next_batch(batch_size=batch_size):
# (TODO) Preprocess and reshape our current batch, batch_X:
this_batch = preprocess(batch_X, ds).reshape(-1, nb_features)
# Compute the cost, and run the optimizer.
this_cost += sess.run([cost, optimizer], feed_dict={X: this_batch})[0]
# Average cost of this epoch
avg_cost = this_cost / ds.X.shape[0] / batch_size
print(epoch_i, avg_cost)
# Let's also try to see how the network currently reconstructs the input.
# We'll draw the reconstruction every `step` iterations.
if epoch_i % step == 0:
# (TODO) Ask for the output of the network, Y, and give it our test examples
recon = sess.run(Y, feed_dict={X: batch_X.reshape(-1, nb_features)})
# Resize the 2d to the 4d representation:
rsz = recon.reshape(examples.shape)
# We have to unprocess the image now, removing the normalization
unnorm_img = deprocess(rsz, ds)
# Clip to avoid saturation
# TODO: Make sure this image is the correct range, e.g.
# for float32 0-1, you should clip between 0 and 1
# for uint8 0-255, you should clip between 0 and 255!
clipped = np.clip(unnorm_img, 0, 255)
# And we can create a montage of the reconstruction
recon = utils.montage(clipped).astype(np.uint8)
# Store for gif
gifs.append(recon)
fig, axs = plt.subplots(1, 2, figsize=(10, 10))
axs[0].imshow(test_images)
axs[0].set_title('Original')
axs[1].imshow(recon)
axs[1].set_title('Synthesis')
fig.canvas.draw()
plt.show()