In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sys
from six.moves import cPickle as pickle
%matplotlib inline
In [2]:
pickle_file = 'mini_train.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
mini_X = save['data']
mini_outcome = save['outcome']
del save # hint to help gc free up memory
In [3]:
batch_size=40
num_unrollings=5
class BatchGenerator(object):
def __init__(self, x_image, y_labels, batch_size, num_unrollings):
self._x_image = x_image
self._y_labels = y_labels
self._batch_size = batch_size
self._num_unrollings = num_unrollings
self._y_digits = self._extract_digits()
def _extract_digits(self):
end_digit = 10.0
digits = np.ndarray(shape=(
self._num_unrollings, len(self._y_labels), int(end_digit + 1)),
dtype=np.float32)
for i in range(self._num_unrollings):
digit_coding = np.asarray( [x[i] if len(x)>i else end_digit
for x in self._y_labels])
digit_coding = (
np.arange(end_digit+1) == digit_coding[:,None]).astype(np.float32)
digits[i,:,:] = digit_coding
return digits
def next_batch(self):
idx = np.random.choice(self._x_image.shape[0],self._batch_size)
batch_x = self._x_image[idx,:,:,:]
batch_y = self._y_digits[:,idx,:]
return batch_x, batch_y
In [4]:
mini_train_batches = BatchGenerator(mini_X[:100],
mini_outcome['label'][:100],
batch_size, num_unrollings)
batch_x, batch_y = mini_train_batches.next_batch()
print batch_y.shape
print batch_x.shape
In [5]:
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
In [32]:
image_size = mini_X.shape[1]
num_channels = mini_X.shape[3]
CNN_num_nodes = 1024
#this should be large enough
RNN_num_nodes = 1024
#11 collums for each digits, i.e., 0,1,...,9, and a ending ch <END>
vocabulary_size = 11
graph = tf.Graph()
with graph.as_default():
x_image = tf.placeholder(tf.float32, shape=(batch_size,
image_size,
image_size, num_channels))
W_conv1 = weight_variable([5, 5, num_channels, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([16 * 16 * 64, CNN_num_nodes])
b_fc1 = bias_variable([CNN_num_nodes])
h_pool2_flat = tf.reshape(h_pool2, [-1, 16*16*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Input gate: input, previous output, and bias.
ix = weight_variable([vocabulary_size, RNN_num_nodes])
im = weight_variable([RNN_num_nodes, RNN_num_nodes])
ib = bias_variable([RNN_num_nodes])
# Forget gate: input, previous output, and bias.
fx = weight_variable([vocabulary_size, RNN_num_nodes])
fm = weight_variable([RNN_num_nodes, RNN_num_nodes])
fb = bias_variable([RNN_num_nodes])
# Memory cell: input, state and bias.
cx = weight_variable([vocabulary_size, RNN_num_nodes])
cm = weight_variable([RNN_num_nodes, RNN_num_nodes])
cb = bias_variable([RNN_num_nodes])
# Output gate: input, previous output, and bias.
ox = weight_variable([vocabulary_size, RNN_num_nodes])
om = weight_variable([RNN_num_nodes, RNN_num_nodes])
ob = bias_variable([RNN_num_nodes])
# Definition of the cell computation.
# state is cell state, o is hidden state, i is input
def lstm_cell(i, o, state):
"""Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf
Note that in this formulation, we omit the various connections between the
previous state and the gates."""
input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)
forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)
update = tf.matmul(i, cx) + tf.matmul(o, cm) + cb
state = forget_gate * state + input_gate * tf.tanh(update)
output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)
return output_gate * tf.tanh(state), state
# placeholder for digit input and digit labels
digits_data = []
for _ in range(num_unrollings + 1):
digits_data.append(
tf.placeholder(tf.float32, shape=[batch_size,vocabulary_size]))
digits_inputs = digits_data[:num_unrollings]
digits_labels = digits_data[1:] # labels are inputs shifted by one time step.
# Variables saving state across unrollings.
saved_output = tf.Variable(tf.zeros([batch_size, RNN_num_nodes]), trainable=False)
saved_state = tf.Variable(tf.zeros([batch_size, RNN_num_nodes]), trainable=False)
#connect with CNN
W_CNN = weight_variable([CNN_num_nodes, RNN_num_nodes])
b_CNN = bias_variable([RNN_num_nodes])
CNN_output = tf.matmul(h_fc1, W_CNN) + b_CNN
output = saved_output + CNN_output
state = saved_state + CNN_output
# Unrolled LSTM loop.
outputs = list()
for i in digits_inputs:
output, state = lstm_cell(i, output, state)
outputs.append(output)
# Classifier weights and biases.
w_fc_rnn = weight_variable([RNN_num_nodes, vocabulary_size])
b_fc_rnn = bias_variable([vocabulary_size])
# State saving across unrollings.
with tf.control_dependencies([saved_output.assign(output), saved_state.assign(state)]):
# Classifier.
logits = tf.nn.xw_plus_b(tf.concat(0, outputs), w_fc_rnn, b_fc_rnn)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits, tf.concat(0, digits_labels)))
# Optimizer.
#optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)
# learning rate decay and gradiant clipping
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.1
learning_rate = tf.train.exponential_decay(starter_learning_rate,
global_step, 100, 0.5, staircase=True)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
gradients, v = zip(*optimizer.compute_gradients(loss)) # reverse zip opration
gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
optimizer = optimizer.apply_gradients(zip(gradients, v), global_step=global_step)
# Predictions.
train_prediction = tf.nn.softmax(logits)
#let's check the prediction accuracy for 1st digit
correct_prediction = tf.equal(tf.argmax(
tf.matmul(outputs[0], w_fc_rnn) + b_fc_rnn
,1),
tf.argmax(
digits_labels[0]
,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
In [33]:
num_steps = 1000
summary_frequency = 20
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print('Initialized')
mean_loss = 0
mean_accuracy = 0
for step in range(num_steps):
batch_x, batch_y = mini_train_batches.next_batch()
feed_dict = dict()
feed_dict[x_image] = batch_x
feed_dict[digits_data[0]] = np.zeros([batch_y.shape[1],batch_y.shape[2]])
for i in range(num_unrollings):
feed_dict[digits_data[i+1]] = batch_y[i]
_, l, lr = session.run(
[optimizer, loss, learning_rate], feed_dict=feed_dict)
mean_loss += l
train_accuracy = accuracy.eval(feed_dict=feed_dict)
mean_accuracy += train_accuracy
#now print something
if step % summary_frequency == 0:
if step > 0:
mean_loss = mean_loss / summary_frequency
mean_accuracy = mean_accuracy/ summary_frequency
# The mean loss is an estimate of the loss over the last few batches.
print('Average loss at step %d: %f learning rate: %f' % (step, mean_loss, lr))
mean_loss = 0
print("step %d, training accuracy %g"%(step, mean_accuracy))
mean_accuracy = 0
In [ ]:
In [ ]: