In [1]:
# First check the Python version
import sys
if sys.version_info < (3,4):
print('You are running an older version of Python!\n\n' \
'You should consider updating to Python 3.4.0 or ' \
'higher as the libraries built for this course ' \
'have only been tested in Python 3.4 and higher.\n')
print('Try installing the Python 3.5 version of anaconda '
'and then restart `jupyter notebook`:\n' \
'https://www.continuum.io/downloads\n\n')
# Now get necessary libraries
try:
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage import data
from scipy.misc import imresize
import IPython.display as ipyd
except ImportError:
print('You are missing some packages! ' \
'We will try installing them before continuing!')
!pip install "numpy>=1.11.0" "matplotlib>=1.5.1" "scikit-image>=0.11.3" "scikit-learn>=0.17" "scipy>=0.17.0"
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage import data
from scipy.misc import imresize
import IPython.display as ipyd
print('Done!')
# Import Tensorflow
try:
import tensorflow as tf
except ImportError:
print("You do not have tensorflow installed!")
print("Follow the instructions on the following link")
print("to install tensorflow before continuing:")
print("")
print("https://github.com/pkmital/CADL#installation-preliminaries")
# This cell includes the provided libraries from the zip file
# and a library for displaying images from ipython, which
# we will use to display the gif
try:
from libs import utils, gif, datasets, dataset_utils, vae, dft, tboard
except ImportError:
print("Make sure you have started notebook in the same directory" +
" as the provided zip file which includes the 'libs' folder" +
" and the file 'utils.py' inside of it. You will NOT be able"
" to complete this assignment unless you restart jupyter"
" notebook inside the directory created by extracting"
" the zip file or cloning the github repo.")
# We'll tell matplotlib to inline any drawn figures like so:
%matplotlib inline
plt.style.use('ggplot')
In [2]:
def crop_edge(img, cropped_rate):
"""Crop arbitrary amount of pixel.
"""
row_i = int(img.shape[0] * cropped_rate) // 2
col_i = int(img.shape[1] * cropped_rate) // 2
return img[row_i:-row_i, col_i:-col_i]
In [3]:
# See how this works w/ Celeb Images or try your own dataset instead:
dirname = '../data/pokemon/jpeg/'
# Load every image file in the provided directory
filenames = [os.path.join(dirname, fname)
for fname in os.listdir(dirname)]
# imgs = [plt.imread(fname)[..., :3] for fname in filenames]
imgs = [imresize(crop_edge(plt.imread(f), 0.4), (64, 64)) for f in filenames]
imgs = np.array(imgs)
# Shuffle and limit the number of files to process
nb_clip = 100
np.random.shuffle(imgs)
imgs = imgs[:nb_clip]
# Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array):
Xs = imgs.reshape(-1, 64, 64, 3 )
print(Xs.shape)
assert(Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100)
In [4]:
ds = datasets.Dataset(Xs)
In [5]:
mean_img = ds.mean().astype(np.uint8)
plt.imshow(mean_img)
# If your image comes out entirely black, try w/o the `astype(np.uint8)`
# that means your images are read in as 0-255, rather than 0-1 and
# this simply depends on the version of matplotlib you are using.
Out[5]:
In [6]:
std_img = ds.std()
plt.imshow(std_img)
print(std_img.shape)
In [7]:
std_img = np.mean(std_img, axis=2)
plt.imshow(std_img)
Out[7]:
In [8]:
plt.imshow(ds.X[0])
print(ds.X.shape)
In [9]:
plt.imshow(ds.X[0].mean(axis=2))
Out[9]:
In [10]:
plt.imshow(ds.X[0].std(axis=2))
Out[10]:
In [11]:
# Write a function to preprocess/normalize an image, given its dataset object
# (which stores the mean and standard deviation!)
def preprocess(img, ds):
norm_img = (img - ds.mean()) / ds.std()
return norm_img
# Write a function to undo the normalization of an image, given its dataset object
# (which stores the mean and standard deviation!)
def deprocess(norm_img, ds):
img = norm_img * ds.std() + ds.mean()
return img
In [12]:
nb_features = len(ds.X[0].flatten())
print(nb_features)
In [13]:
64*64*3
Out[13]:
In [14]:
encoder_dimensions = [1024, 64, 4]
In [15]:
tf.reset_default_graph()
In [16]:
X = tf.placeholder(np.float32, [None, nb_features])
assert(X.get_shape().as_list() == [None, nb_features])
In [17]:
def encode(X, dimensions, activation=tf.nn.tanh):
# We're going to keep every matrix we create so let's create a list to hold them all
Ws = []
# We'll create a for loop to create each layer:
for layer_i, nb_output in enumerate(dimensions):
# TODO: just like in the last session,
# we'll use a variable scope to help encapsulate our variables
# This will simply prefix all the variables made in this scope
# with the name we give it. Make sure it is a unique name
# for each layer, e.g., 'encoder/layer1', 'encoder/layer2', or
# 'encoder/1', 'encoder/2',...
with tf.variable_scope('encode/layer{}'.format(layer_i)):
# TODO: Create a weight matrix which will increasingly reduce
# down the amount of information in the input by performing
# a matrix multiplication. You can use the utils.linear function.
h, W = utils.linear(X, nb_output, activation=activation)
# TODO: Apply an activation function (unless you used the parameter
# for activation function in the utils.linear call)
# Finally we'll store the weight matrix.
# We need to keep track of all
# the weight matrices we've used in our encoder
# so that we can build the decoder using the
# same weight matrices.
Ws.append(W)
# Replace X with the current layer's output, so we can
# use it in the next layer.
X = h
print("layer/{}/X".format(layer_i))
print(X.get_shape())
z = X
print("enode/global/X/")
print(X.get_shape())
return Ws, z
In [18]:
# Then call the function
Ws, Z = encode(X, encoder_dimensions)
# And just some checks to make sure you've done it right.
# assert(Z.get_shape().as_list() == [None, 2])
# assert(len(Ws) == len(encoder_dimensions))
In [19]:
[W_i.get_shape().as_list() for W_i in Ws]
Out[19]:
In [20]:
Z.get_shape().as_list()
Out[20]:
In [21]:
# We'll first reverse the order of our weight matrices
decoder_Ws = Ws[::-1]
# then reverse the order of our dimensions
# appending the last layers number of inputs.
decoder_dimensions = encoder_dimensions[::-1][1:] + [nb_features]
print(decoder_dimensions)
assert(decoder_dimensions[-1] == nb_features)
In [22]:
def decode(z, dimensions, Ws, activation=tf.nn.tanh):
current_input = z
for layer_i, n_output in enumerate(dimensions):
# we'll use a variable scope again to help encapsulate our variables
# This will simply prefix all the variables made in this scope
# with the name we give it.
with tf.variable_scope("decoder/layer/{}".format(layer_i)):
# Now we'll grab the weight matrix we created before and transpose it
# So a 3072 x 784 matrix would become 784 x 3072
# or a 256 x 64 matrix, would become 64 x 256
W = tf.transpose(Ws[layer_i])
# Now we'll multiply our input by our transposed W matrix
h = tf.matmul(current_input, W)
# And then use a relu activation function on its output
current_input = activation(h)
# We'll also replace n_input with the current n_output, so that on the
# next iteration, our new number inputs will be correct.
n_input = n_output
Y = current_input
return Y
In [23]:
Y = decode(Z, decoder_dimensions, decoder_Ws)
In [24]:
Y.get_shape().as_list()
Out[24]:
In [25]:
# Calculate some measure of loss, e.g. the pixel to pixel absolute difference or squared difference
# loss = tf.reduce_mean(tf.squared_difference(X, Y), 1)
loss = tf.squared_difference(X, Y)
# Now sum over every pixel and then calculate the mean over the batch dimension (just like session 2!)
# hint, use tf.reduce_mean and tf.reduce_sum
cost = tf.reduce_sum(loss)
In [26]:
learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate==learning_rate).minimize(cost)
In [27]:
# (TODO) Create a tensorflow session and initialize all of our weights:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
In [28]:
from libs import tboard
tboard.show_graph(tf.get_default_graph().as_graph_def())
In [30]:
# Some parameters for training
batch_size = 100
nb_epochs = 401
step = 10
# We'll try to reconstruct the same first 100 images and show how
# The network does over the course of training.
examples = ds.X[:100]
# We have to preprocess the images before feeding them to the network.
# I'll do this once here, so we don't have to do it every iteration.
test_examples = preprocess(examples, ds).reshape(-1, nb_features)
# If we want to just visualize them, we can create a montage.
test_images = utils.montage(examples).astype(np.uint8)
# Store images so we can make a gif
gifs = []
# Now for our training:
for epoch_i in range(nb_epochs):
# Keep track of the cost
this_cost = 0
# Iterate over the entire dataset in batches
for batch_X, _ in ds.train.next_batch(batch_size=batch_size):
# (TODO) Preprocess and reshape our current batch, batch_X:
this_batch = preprocess(batch_X, ds).reshape(-1, nb_features)
# Compute the cost, and run the optimizer.
this_cost += sess.run([cost, optimizer], feed_dict={X: this_batch})[0]
# Average cost of this epoch
avg_cost = this_cost / ds.X.shape[0] / batch_size
print(epoch_i, avg_cost)
# Let's also try to see how the network currently reconstructs the input.
# We'll draw the reconstruction every `step` iterations.
if epoch_i % step == 0:
# (TODO) Ask for the output of the network, Y, and give it our test examples
recon = sess.run(Y, feed_dict={X: batch_X.reshape(-1, nb_features)})
# Resize the 2d to the 4d representation:
rsz = recon.reshape(examples.shape)
# We have to unprocess the image now, removing the normalization
unnorm_img = deprocess(rsz, ds)
# Clip to avoid saturation
# TODO: Make sure this image is the correct range, e.g.
# for float32 0-1, you should clip between 0 and 1
# for uint8 0-255, you should clip between 0 and 255!
clipped = np.clip(unnorm_img, 0, 255)
# And we can create a montage of the reconstruction
recon = utils.montage(clipped).astype(np.uint8)
# Store for gif
gifs.append(recon)
fig, axs = plt.subplots(1, 2, figsize=(10, 10))
axs[0].imshow(test_images)
axs[0].set_title('Original')
axs[1].imshow(recon)
axs[1].set_title('Synthesis')
fig.canvas.draw()
plt.show()
In [31]:
# First check the Python version
import sys
if sys.version_info < (3,4):
print('You are running an older version of Python!\n\n' \
'You should consider updating to Python 3.4.0 or ' \
'higher as the libraries built for this course ' \
'have only been tested in Python 3.4 and higher.\n')
print('Try installing the Python 3.5 version of anaconda '
'and then restart `jupyter notebook`:\n' \
'https://www.continuum.io/downloads\n\n')
# Now get necessary libraries
try:
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage import data
from scipy.misc import imresize
import IPython.display as ipyd
except ImportError:
print('You are missing some packages! ' \
'We will try installing them before continuing!')
!pip install "numpy>=1.11.0" "matplotlib>=1.5.1" "scikit-image>=0.11.3" "scikit-learn>=0.17" "scipy>=0.17.0"
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage import data
from scipy.misc import imresize
import IPython.display as ipyd
print('Done!')
# Import Tensorflow
try:
import tensorflow as tf
except ImportError:
print("You do not have tensorflow installed!")
print("Follow the instructions on the following link")
print("to install tensorflow before continuing:")
print("")
print("https://github.com/pkmital/CADL#installation-preliminaries")
# This cell includes the provided libraries from the zip file
# and a library for displaying images from ipython, which
# we will use to display the gif
try:
from libs import utils, gif, datasets, dataset_utils, vae, dft
except ImportError:
print("Make sure you have started notebook in the same directory" +
" as the provided zip file which includes the 'libs' folder" +
" and the file 'utils.py' inside of it. You will NOT be able"
" to complete this assignment unless you restart jupyter"
" notebook inside the directory created by extracting"
" the zip file or cloning the github repo.")
# We'll tell matplotlib to inline any drawn figures like so:
%matplotlib inline
plt.style.use('ggplot')
In [32]:
def crop_edge(img, cropped_rate):
"""Crop arbitrary amount of pixel.
"""
row_i = int(img.shape[0] * cropped_rate) // 2
col_i = int(img.shape[1] * cropped_rate) // 2
return img[row_i:-row_i, col_i:-col_i]
In [33]:
# See how this works w/ Celeb Images or try your own dataset instead:
filenames = [os.path.join('../data/pokemon/jpeg/', file_i)
for file_i in os.listdir('../data/pokemon/jpeg/')
if '.jpg' in file_i]
nb_clip = 100
filenames = filenames[:nb_clip]
imgs = [imresize(crop_edge(plt.imread(f), 0.4), (100, 100)) for f in filenames]
# Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array):
Xs = np.array(imgs)
print(Xs.shape)
assert(Xs.ndim == 4 and Xs.shape[1] <= 250 and Xs.shape[2] <= 250)
plt.figure(figsize=(10, 10))
plt.imshow(utils.montage(imgs).astype(np.uint8))
Out[33]:
In [34]:
ds = datasets.Dataset(Xs)
In [35]:
mean_img = ds.mean().astype(np.uint8)
plt.imshow(mean_img)
print(ds.mean().shape)
In [36]:
std_img = ds.std() #.astype(np.uint8)
plt.imshow(std_img)
print(std_img.shape)
In [37]:
std_img = np.mean(std_img, axis=2)
plt.imshow(std_img)
Out[37]:
In [38]:
plt.imshow(ds.X[0])
print(ds.X[0].shape)
print(ds.X.shape)
In [39]:
for (X, y) in ds.train.next_batch(batch_size=10):
print(X.shape)
In [40]:
# Write a function to preprocess/normalize an image, given its dataset object
# (which stores the mean and standard deviation!)
def preprocess(img, ds):
norm_img = (img - ds.mean()) / ds.std()
return norm_img
# Write a function to undo the normalization of an image, given its dataset object
# (which stores the mean and standard deviation!)
def deprocess(norm_img, ds):
img = norm_img * ds.std() + ds.mean()
return img
# Just to make sure that you've coded the previous two functions correctly:
assert(np.allclose(deprocess(preprocess(ds.X[0], ds), ds), ds.X[0]))
plt.imshow(deprocess(preprocess(ds.X[0], ds), ds).astype(np.uint8))
Out[40]:
In [41]:
# Calculate the number of features in your image.
# This is the total number of pixels, or (height x width x channels).
height = ds.X[0].shape[0]
width = ds.X[0].shape[1]
channels = ds.X[0].shape[2]
n_features = height * width * channels
print(n_features)
In [78]:
# encoder_dimensions = [1024, 256, 64, 2]
# encoder_dimensions = [1024, 64, 16, 2]
encoder_dimensions = [1024, 64, 4]
# encoder_dimensions = [1024, 512, 256, 128, 64, 32, 16, 8]
In [79]:
tf.reset_default_graph()
In [80]:
X = tf.placeholder(tf.float32, shape = (None, n_features), name = "X")
assert(X.get_shape().as_list() == [None, n_features])
In [81]:
def encode(X, dimensions, activation=tf.nn.tanh):
# We're going to keep every matrix we create so let's create a list to hold them all
Ws = []
# We'll create a for loop to create each layer:
for layer_i, n_output in enumerate(dimensions):
# This will simply prefix all the variables made in this scope
# with the name we give it. Make sure it is a unique name
# for each layer, e.g., 'encoder/layer1', 'encoder/layer2', or
# 'encoder/1', 'encoder/2',...
with tf.variable_scope("encode/layer" + str(layer_i + 1)):
# Create a weight matrix which will increasingly reduce
# down the amount of information in the input by performing
# a matrix multiplication. You can use the utils.linear function.
h, W = utils.linear(X, dimensions[layer_i])
# Finally we'll store the weight matrix.
# We need to keep track of all
# the weight matrices we've used in our encoder
# so that we can build the decoder using the
# same weight matrices.
Ws.append(W)
# Replace X with the current layer's output, so we can
# use it in the next layer.
X = h
z = X
return Ws, z
In [82]:
# Then call the function
Ws, z = encode(X, encoder_dimensions)
# And just some checks to make sure you've done it right.
# assert(z.get_shape().as_list() == [None, 2])
# assert(len(Ws) == len(encoder_dimensions))
In [83]:
# We'll first reverse the order of our weight matrices
decoder_Ws = Ws[::-1]
# then reverse the order of our dimensions
# appending the last layers number of inputs.
decoder_dimensions = encoder_dimensions[::-1][1:] + [n_features]
print(decoder_dimensions)
assert(decoder_dimensions[-1] == n_features)
In [84]:
def decode(z, dimensions, Ws, activation=tf.nn.tanh):
current_input = z
for layer_i, n_output in enumerate(dimensions):
# we'll use a variable scope again to help encapsulate our variables
# This will simply prefix all the variables made in this scope
# with the name we give it.
with tf.variable_scope("decoder/layer/{}".format(layer_i)):
# Now we'll grab the weight matrix we created before and transpose it
# So a 3072 x 784 matrix would become 784 x 3072
# or a 256 x 64 matrix, would become 64 x 256
W = tf.transpose(Ws[layer_i])
# Now we'll multiply our input by our transposed W matrix
h = tf.matmul(current_input, W)
# And then use a relu activation function on its output
current_input = activation(h)
# We'll also replace n_input with the current n_output, so that on the
# next iteration, our new number inputs will be correct.
n_input = n_output
Y = current_input
return Y
In [85]:
Y = decode(z, decoder_dimensions, decoder_Ws)
In [86]:
Y.get_shape().as_list()
Out[86]:
In [87]:
# Calculate some measure of loss, e.g. the pixel to pixel absolute difference or squared difference
loss = tf.squared_difference(X, Y)
# Now sum over every pixel and then calculate the mean over the batch dimension (just like session 2!)
# hint, use tf.reduce_mean and tf.reduce_sum
cost = tf.reduce_sum(loss)
In [88]:
learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
In [89]:
from libs import tboard
tboard.show_graph(tf.get_default_graph().as_graph_def())
In [91]:
# Create a tensorflow session and initialize all of our weights:
sess = tf.Session()
sess.run(tf.initialize_all_variables())
# Some parameters for training
batch_size = 100
n_epochs = 151
step = 10
# We'll try to reconstruct the same first 100 images and show how
# The network does over the course of training.
examples = ds.X[:100]
# We have to preprocess the images before feeding them to the network.
# I'll do this once here, so we don't have to do it every iteration.
test_examples = preprocess(examples, ds).reshape(-1, n_features)
# If we want to just visualize them, we can create a montage.
test_images = utils.montage(examples).astype(np.uint8)
# Store images so we can make a gif
gifs = []
# Now for our training:
for epoch_i in range(n_epochs):
# Keep track of the cost
this_cost = 0
# Iterate over the entire dataset in batches
for batch_X, _ in ds.train.next_batch(batch_size = batch_size):
# Preprocess and reshape our current batch, batch_X:
this_batch = preprocess(batch_X, ds).reshape(-1, n_features)
# Compute the cost, and run the optimizer.
this_cost += sess.run([cost, optimizer], feed_dict = {X: this_batch})[0]
# Average cost of this epoch
avg_cost = this_cost / ds.X.shape[0] / batch_size
print(epoch_i, avg_cost)
# Let's also try to see how the network currently reconstructs the input.
# We'll draw the reconstruction every `step` iterations.
if epoch_i % step == 0:
# Ask for the output of the network, Y, and give it our test examples
recon = sess.run(Y, feed_dict = {X: test_examples})
# Resize the 2d to the 4d representation:
rsz = recon.reshape(examples.shape)
# We have to unprocess the image now, removing the normalization
unnorm_img = deprocess(rsz, ds)
# Clip to avoid saturation
clipped = np.clip(unnorm_img, 0, 255)
# And we can create a montage of the reconstruction
recon = utils.montage(clipped).astype(np.uint8)
# Store for gif
gifs.append(recon)
fig, axs = plt.subplots(1, 2, figsize=(10, 10))
axs[0].imshow(test_images)
axs[0].set_title('Original')
axs[1].imshow(recon)
axs[1].set_title('Synthesis')
fig.canvas.draw()
plt.show()
In [92]:
fig, axs = plt.subplots(1, 2, figsize=(10, 10))
axs[0].imshow(test_images)
axs[0].set_title('Original')
axs[1].imshow(recon)
axs[1].set_title('Synthesis')
fig.canvas.draw()
plt.show()
plt.imsave(arr=test_images, fname='./output/pokemon-test-4.png')
plt.imsave(arr=recon, fname='./output/pokemon-recon-4.png')
In [ ]:
zs = sess.run(z, feed_dict={X:test_examples})
In [51]:
zs.shape
Out[51]:
In [52]:
plt.scatter(zs[:, 0], zs[:, 1])
Out[52]:
In [53]:
n_images = 100
idxs = np.linspace(np.min(zs) * 2.0, np.max(zs) * 2.0,
int(np.ceil(np.sqrt(n_images))))
xs, ys = np.meshgrid(idxs, idxs)
grid = np.dstack((ys, xs)).reshape(-1, 2)[:n_images,:]
In [54]:
fig, axs = plt.subplots(1,2,figsize=(8,3))
axs[0].scatter(zs[:, 0], zs[:, 1],
edgecolors='none', marker='o', s=2)
axs[0].set_title('Autoencoder Embedding')
axs[1].scatter(grid[:,0], grid[:,1],
edgecolors='none', marker='o', s=2)
axs[1].set_title('Ideal Grid')
Out[54]:
In [55]:
from scipy.spatial.distance import cdist
cost = cdist(grid[:, :], zs[:, :], 'sqeuclidean')
from scipy.optimize._hungarian import linear_sum_assignment
indexes = linear_sum_assignment(cost)
In [56]:
indexes
Out[56]:
In [59]:
min(indexes[1])
Out[59]:
In [62]:
xmin = min(zs[1])
xmax = max(zs[1])
plt.figure(figsize=(5, 5))
for i in range(len(zs)):
plt.plot([zs[indexes[1][i], 0], grid[i, 0]],
[zs[indexes[1][i], 1], grid[i, 1]], 'r')
# plt.xlim([-3, 3])
# plt.ylim([-3, 3])
plt.xlim([xmin, xmax])
plt.ylim([xmin, xmax])
Out[62]:
In [63]:
examples_sorted = []
for i in indexes[1]:
examples_sorted.append(examples[i])
plt.figure(figsize=(15, 15))
img = utils.montage(np.array(examples_sorted)).astype(np.uint8)
plt.imshow(img,
interpolation='nearest')
plt.imsave(arr=img, fname='sorted.png')
In [69]:
# This is a quick way to do what we could have done as
# a nested for loop:
zs = np.meshgrid(np.linspace(-6000, 6000, 10),
np.linspace(-6000, 6000, 10))
# Now we have 100 x 2 values of every possible position
# in a 2D grid from -1 to 1:
zs = np.c_[zs[0].ravel(), zs[1].ravel()]
In [70]:
recon = sess.run(Y, feed_dict={z : zs})
# reshape the result to an image:
rsz = recon.reshape(examples.shape)
# Deprocess the result, unnormalizing it
unnorm_img = deprocess(rsz, ds)
# clip to avoid saturation
clipped = np.clip(unnorm_img, 0, 255)
# Create a montage
img_i = utils.montage(clipped).astype(np.uint8)
In [71]:
plt.figure(figsize=(15, 15))
plt.imshow(img_i)
plt.imsave(arr=img_i, fname='manifold.png')
In [ ]: