In [ ]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
In [ ]:
import tensorflow as tf
assert tf.__version__=="1.3.0" # the version we used
from tensorflow.contrib.tensorboard.plugins import projector # for visualizing embeddings
import numpy as np # computation
import os # creat dirs
from os.path import join as jp # join pathes
import logging # print info
import sklearn # datasets, clustering
from sklearn.datasets import fetch_mldata
from IPython.display import Image # displaying images inline
import matplotlib # plotting stuff
matplotlib.use('Agg') # for displaying plots in console without display
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import time # estimating ETA
In [ ]:
# fix training
RANDOM_SEED = 0
# configure numpy
np.set_printoptions(precision=3, suppress=True)
np.random.seed(RANDOM_SEED)
# configure tensorflow
tf.set_random_seed(RANDOM_SEED)
# configure
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
In [ ]:
def create_dir(path):
if not os.path.exists(path):
os.makedirs(path)
logger.info("Created directory: %s"%path)
def plot_reconstruction( samples, epoch=20, size_x=3, size_y=3, name="reconstruction"):
if not (size_x * size_y) == len(samples): #
l = min(len(samples),200)
size_x = int(np.sqrt(l))
size_y = int(np.sqrt(l))
samples = samples[0:(size_x*size_y)]
fig = plt.figure(figsize=(size_x, size_y))
gs = gridspec.GridSpec(size_x, size_y)
gs.update(wspace=0.05, hspace=0.05)
for i, sample in enumerate(samples):
ax = plt.subplot(gs[i])
plt.axis('off')
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_aspect('equal')
plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
outfile= "visualizations/%s_%0.4d.png"%(name, epoch)
plt.savefig(outfile)
plt.close(fig)
try: # only in ipython notebook
display(Image(filename=outfile))
except: pass
def link_embedding_to_metadata(embedding_var, metadata_file, graph_dir):
from tensorflow.contrib.tensorboard.plugins import projector # for visualizing embeddings
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name.replace(":0","")
embedding.metadata_path = metadata_file
summary_writer = tf.summary.FileWriter(graph_dir)
projector.visualize_embeddings(summary_writer, config)
In [ ]:
MODEL_NAME = "mlp-sdae"
DATA_DIR = "data"
VIZUALIZATIONS_DIR = "visualizations"
create_dir(DATA_DIR) # data is stored here
create_dir(VIZUALIZATIONS_DIR) # plots we generate
create_dir("graphs")
TAG = "%0.3d"%(len(os.listdir("graphs"))+1)
GRAPH_DIR = jp("graphs", "%s-%s"%(MODEL_NAME, TAG))
create_dir(GRAPH_DIR) # store tensorflow graph here
In [ ]:
BATCH_SIZE = 200
NUM_FEATURES = 784 # pixels in the mnist
NUM_ENCODING_DIMENSIONS = 10 # encoded space dimensions
NUM_PRETRAIN_STEPS = 700 # trainsteps per layer
NUM_FINETUNE_STEPS = 1400 # finetuning steps for the whole SDAE
DECREASE_LR_STEPS = 350 #after how many steps the lr should be decreased (now once every epoch)
NUM_DISPLAY_LOSS_STEPS = max(int( NUM_PRETRAIN_STEPS / 50),1) # very two percent of the steps
In [ ]:
mnist = fetch_mldata('MNIST original', data_home=DATA_DIR)
p = np.random.permutation(mnist.data.shape[0]) # shuffle data because it is ordered
X = mnist.data[p].astype(np.float32)*0.02
Y = mnist.target[p]
logger.info(p.shape)
NUM_IMAGES = mnist.data.shape[0]
The model is divided into two parts - first four stacked layers which is defined in Section 3.2, and then the whole Stacked Denoising Auto-Encoder in Section 3.3.
The weights of the layers defined Section 3.2. get reused in the full SDAE in Section 3.3.
In this model, $x$ is our input image, $\hat{x}$ is our reconstructed image, and $h$ the encoded layer. E1..E4 are the fully connected encoder layers, D1 .. D4 are the fully connected decoder layers. he model architecture of the whole stacked denoising autoencoder looks like this:
$x$ => E1 => E2 => E3 => E4 => $h$ => D1 => D2 => D3 => D4 = $\hat{x}$
The first layer stack looks like this:
$x$ => E1 => $h_1$ => DO => D4 => $\hat{x}$
In [ ]:
x_input = tf.placeholder(shape=[None, NUM_FEATURES], dtype=tf.float32, name="X_INPUT")
tf_apply_dropout = tf.placeholder_with_default(True, shape=(), name="APPLY_DROPOUT")
In [ ]:
layer_stacks = {
0:{'name':'Layer_1'},
1:{'name':'Layer_2'},
2:{'name':'Layer_3'},
3:{'name':'Layer_4'},
4:{'name':'Stacked AE'},
}
In [ ]:
with tf.variable_scope("Encoder"):
s1_enc_l1_act = tf.layers.dense(
inputs=x_input,
units=500,
activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer(seed=RANDOM_SEED),
name="FC_1",
)
with tf.variable_scope("Decoder"):
s1_dec_noisy_input = tf.layers.dropout(inputs=s1_enc_l1_act, rate=0.2, seed=RANDOM_SEED, training=tf_apply_dropout)
s1_dec_l4_act = tf.layers.dense(
inputs=s1_dec_noisy_input,
units=NUM_FEATURES,
activation=None,
kernel_initializer=tf.contrib.layers.xavier_initializer(seed=RANDOM_SEED),
name="FC_4",
)
In [ ]:
with tf.variable_scope("Encoder"):
s2_enc_l2_act = tf.layers.dense(
inputs=tf.stop_gradient(s1_dec_noisy_input),
units=500,
activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer(seed=RANDOM_SEED),
name="FC_2",
)
with tf.variable_scope("Decoder"):
s2_dec_noisy_input = tf.layers.dropout(inputs=s2_enc_l2_act, rate=0.2, seed=RANDOM_SEED, training=tf_apply_dropout)
s2_dec_l3_act = tf.layers.dense(
inputs=s2_dec_noisy_input,
units=500,
activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer(seed=RANDOM_SEED),
name="FC_3",
)
In [ ]:
with tf.variable_scope("Encoder"):
s3_enc_l3_act = tf.layers.dense(
inputs=tf.stop_gradient(s2_dec_noisy_input),
units=2000,
activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer(seed=RANDOM_SEED),
name="FC_3",
)
with tf.variable_scope("Decoder"):
s3_dec_noisy_input = tf.layers.dropout(inputs=s3_enc_l3_act, rate=0.2, seed=RANDOM_SEED, training=tf_apply_dropout)
s3_dec_l2_act = tf.layers.dense(
inputs=s3_dec_noisy_input,
units=500,
activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer(seed=RANDOM_SEED),
name="FC_2",
)
In [ ]:
with tf.variable_scope("Encoder"):
s4_enc_l4_act = tf.layers.dense(
inputs=tf.stop_gradient(s3_dec_noisy_input),
units=NUM_ENCODING_DIMENSIONS,
activation=None,
kernel_initializer=tf.contrib.layers.xavier_initializer(seed=RANDOM_SEED),
name="FC_4",
)
with tf.variable_scope("Decoder"):
s4_dec_noisy_input = tf.layers.dropout(inputs=s4_enc_l4_act, rate=0.2, seed=RANDOM_SEED, training=tf_apply_dropout)
s4_dec_l1_act = tf.layers.dense(
inputs=s4_dec_noisy_input,
units=2000,
activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer(seed=RANDOM_SEED),
name="FC_1",
)
In [ ]:
with tf.variable_scope("Encoder"):
enc_l1_act = tf.layers.dense( inputs=x_input, units=500 , activation=tf.nn.relu, name="FC_1", reuse=True )
enc_l2_act = tf.layers.dense( inputs=enc_l1_act, units=500 , activation=tf.nn.relu, name="FC_2", reuse=True )
enc_l3_act = tf.layers.dense( inputs=enc_l2_act, units=2000, activation=tf.nn.relu, name="FC_3", reuse=True )
enc_l4_act = tf.layers.dense( inputs=enc_l3_act, units=NUM_ENCODING_DIMENSIONS , activation=None , name="FC_4", reuse=True )
h = tf.identity(enc_l4_act, "embedded_x")
with tf.variable_scope("Decoder"):
dec_l1_act = tf.layers.dense( inputs=h, units=2000, activation=tf.nn.relu, name="FC_1", reuse=True )
dec_l2_act = tf.layers.dense( inputs=dec_l1_act, units=500 , activation=tf.nn.relu, name="FC_2", reuse=True )
dec_l3_act = tf.layers.dense( inputs=dec_l2_act, units=500 , activation=tf.nn.relu, name="FC_3", reuse=True )
dec_l4_act = tf.layers.dense( inputs=dec_l3_act, units=NUM_FEATURES , activation=None , name="FC_4", reuse=True )
x_reconstructed = tf.identity(dec_l4_act, "reconstructed_x")
In [ ]:
stack_1_loss_op = tf.losses.mean_squared_error(
predictions=s1_dec_l4_act,
labels=x_input
)
stack_2_loss_op = tf.losses.mean_squared_error(
predictions=s2_dec_l3_act,
labels=tf.stop_gradient(s1_enc_l1_act)
)
stack_3_loss_op = tf.losses.mean_squared_error(
predictions=s3_dec_l2_act,
labels=tf.stop_gradient(s2_enc_l2_act)
)
stack_4_loss_op = tf.losses.mean_squared_error(
predictions=s4_dec_l1_act,
labels=tf.stop_gradient(s3_enc_l3_act)
)
In [ ]:
sae_loss_op = tf.losses.mean_squared_error(
predictions=x_reconstructed,
labels=x_input
)
In [ ]:
with tf.variable_scope("ImageEncodings"):
encoded_images = tf.get_variable( name="encoded_images", shape=[NUM_IMAGES, NUM_ENCODING_DIMENSIONS], dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False )
tf_batch_idx = tf.Variable(0, trainable=False)
update_indices = tf.range(tf_batch_idx*BATCH_SIZE, tf_batch_idx*BATCH_SIZE+BATCH_SIZE)
update_indices = tf.reshape(update_indices, shape = [-1, 1])
encode_images_op = tf.scatter_nd_update(ref=encoded_images, indices=update_indices, updates=h, name="encode_images_op")
In [ ]:
tf_learning_rate = tf.Variable(0.1, trainable=False, name="LR")
new_lr = tf.placeholder(shape=(), dtype=tf.float32)
set_learning_rate_op = tf_learning_rate.assign(new_lr)
In [ ]:
sdg = tf.train.GradientDescentOptimizer(learning_rate=tf_learning_rate)
stack_1_trainstep_op = sdg.minimize(stack_1_loss_op)
stack_2_trainstep_op = sdg.minimize(stack_2_loss_op)
stack_3_trainstep_op = sdg.minimize(stack_3_loss_op)
stack_4_trainstep_op = sdg.minimize(stack_4_loss_op)
sae_trainstep_op = sdg.minimize(sae_loss_op)
In [ ]:
s1_loss_summary = tf.summary.scalar("Stack_1_Loss",tf.cast(stack_1_loss_op, tf.float32)) # summary for reconstruction loss
s2_loss_summary = tf.summary.scalar("Stack_2_Loss",tf.cast(stack_2_loss_op, tf.float32)) # summary for reconstruction loss
s3_loss_summary = tf.summary.scalar("Stack_3_Loss",tf.cast(stack_3_loss_op, tf.float32)) # summary for reconstruction loss
s4_loss_summary = tf.summary.scalar("Stack_4_Loss",tf.cast(stack_4_loss_op, tf.float32)) # summary for reconstruction loss
sae_loss_summary = tf.summary.scalar("StackedAE_Loss",tf.cast(sae_loss_op, tf.float32)) # summary for reconstruction loss
# https://www.tensorflow.org/api_docs/python/tf/summary/merge
In [ ]:
## create fill layer stacks
layer_stacks[0].update({
"trainstep":stack_1_trainstep_op,
"loss_op":stack_1_loss_op,
"loss_summary":s1_loss_summary
})
layer_stacks[1].update({
"trainstep":stack_2_trainstep_op,
"loss_op":stack_2_loss_op,
"loss_summary":s2_loss_summary
})
layer_stacks[2].update({
"trainstep":stack_3_trainstep_op,
"loss_op":stack_3_loss_op,
"loss_summary":s3_loss_summary
})
layer_stacks[3].update({
"trainstep":stack_4_trainstep_op,
"loss_op":stack_4_loss_op,
"loss_summary":s4_loss_summary
})
layer_stacks[4].update({
"trainstep":sae_trainstep_op,
"loss_op":sae_loss_op,
"loss_summary":sae_loss_summary
})
In [ ]:
saver = tf.train.Saver(tf.global_variables()) # Saver
session = tf.Session()
session.run(tf.global_variables_initializer())
summary_writer = tf.summary.FileWriter(GRAPH_DIR)
summary_writer.add_graph(session.graph)
tf.trainable_variables()
In [ ]:
def abs_sum_diff(w1, w2):
return np.sum(np.abs(w1-w2))
def print_weight_change(func):
import time
def decorated(*args, **kwargs):
trainable_weights_start = session.run([v for v in tf.trainable_variables() if "kernel" in v.name])
result = func(*args, **kwargs)
trainable_weights = session.run([v for v in tf.trainable_variables() if "kernel" in v.name])
weight_names = [v.name for v in tf.trainable_variables() if "kernel" in v.name]
tw_pairs = zip(weight_names, trainable_weights_start, trainable_weights)
logger.info("Weights that changed:")
for tw_name, tw_start, tw in tw_pairs:
logger.info("%s: delta: %.3f"%(tw_name, abs_sum_diff(tw_start, tw)))
logger.info("")
return result
return decorated
@print_weight_change
def train(stack, num_steps):
logger.info("Started training %s"%stack["name"])
logger.info("Decreasing LR every %i steps "%DECREASE_LR_STEPS)
session.run(set_learning_rate_op, feed_dict={new_lr:0.1})
start_time = time.time()
for step in range(num_steps):
# decrease learning rate after ever a certain amount of steps
if (step+1)%DECREASE_LR_STEPS==0:
current_lr = session.run(tf_learning_rate)
decreased_lr = current_lr/10
session.run(set_learning_rate_op, feed_dict={new_lr:decreased_lr})
logger.info("%s: Decreasing learning_rate from %0.5f to %0.5f"%(stack["name"], current_lr, decreased_lr))
# get batch data
start_index = (step*BATCH_SIZE)%NUM_IMAGES # rotate over images in trainingsdata
end_index = ((step+1)*BATCH_SIZE)%NUM_IMAGES
feed_dict = {x_input:X[start_index:end_index]}
# execute training step
_, loss, summary = session.run([stack["trainstep"], stack["loss_op"], stack["loss_summary"]], feed_dict=feed_dict)
summary_writer.add_summary(summary, step)
# print status every now and then
if step%NUM_DISPLAY_LOSS_STEPS==0:
step_time = (time.time() - start_time) / (step+1) / 60.0 # time per step in minutes
eta = (num_steps - step) * step_time # eta = steps left * step _time
logger.info("%s, Step %i/%i, Trainingsloss %0.5f, ETA~%0.2fm"%(stack["name"], step,num_steps,loss, eta))
saver.save(session, jp(GRAPH_DIR, MODEL_NAME), global_step=0)
logger.info("Finished training of %s"%stack["name"])
In [ ]:
s1a, s1do = session.run([s1_enc_l1_act,s1_dec_noisy_input], feed_dict= {x_input:X[5*BATCH_SIZE:6*BATCH_SIZE]})
print("Measured dropout keep rate: %0.3f"%(np.count_nonzero(s1do) / np.count_nonzero(s1a) ) )
In [ ]:
train(layer_stacks[0], NUM_PRETRAIN_STEPS)
train(layer_stacks[1], NUM_PRETRAIN_STEPS)
train(layer_stacks[2], NUM_PRETRAIN_STEPS)
train(layer_stacks[3], NUM_PRETRAIN_STEPS)
In [ ]:
train(layer_stacks[4], NUM_FINETUNE_STEPS)
In [ ]:
print("Original")
sample_images = X[0:50]
plot_reconstruction(sample_images, name="original")
print("Recfeed_dict=ed")
reconstructed_samples = session.run(x_reconstructed, feed_dict={x_input:sample_images})
plot_reconstruction(reconstructed_samples, name="reconstructed")
In [ ]:
for i in range(0, NUM_IMAGES, BATCH_SIZE):
session.run(encode_images_op, feed_dict={x_input:X[i:i+BATCH_SIZE], tf_batch_idx:int(i/BATCH_SIZE) })
saver.save(session, jp(GRAPH_DIR, MODEL_NAME), global_step=0)
logger.info("Encoded %i images"%NUM_IMAGES)
In [ ]:
# write metadata
IMAGES_EMBEDDINGS_METADATA_FN = '%s.tsv'%("image_embeddings")
with open( jp(GRAPH_DIR, IMAGES_EMBEDDINGS_METADATA_FN) , "w") as mdf:
for line_id, line in enumerate(Y):
mdf.write("%s\n"%(Y[line_id]))
link_embedding_to_metadata(
embedding_var=encoded_images,
metadata_file=IMAGES_EMBEDDINGS_METADATA_FN,
graph_dir=GRAPH_DIR
)
# => you can now look at the encoded images starting tensorboard using 'tensorboard --logdir=graphs/
In [ ]: