In [1]:
# import packages
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell

import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline

from subprocess import check_output

In [21]:
'''
To classify images using a bidirectional recurrent neural network, we consider
every image row as a sequence of pixels. Because MNIST image shape is 28*28px,
we will then handle 28 sequences of 28 steps for every sample.
'''

# Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10

# Network Parameters
n_input = 28 # MNIST data input (img shape: 28*28)
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 10 # MNIST total classes (0-9 digits)
image_to_display = 20
validation_size = 128

In [6]:
# Import MNIST data
dataset = pd.read_csv("../input/train.csv")
test = pd.read_csv("../input/test.csv")

In [7]:
print('dataset({0[0]},{0[1]})'.format(dataset.shape))
print('test({0[0]},{0[1]})'.format(test.shape))
print (dataset.head())
print (test.head())


dataset(42000,785)
test(28000,784)
   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
0      1       0       0       0       0       0       0       0       0   
1      0       0       0       0       0       0       0       0       0   
2      1       0       0       0       0       0       0       0       0   
3      4       0       0       0       0       0       0       0       0   
4      0       0       0       0       0       0       0       0       0   

   pixel8  pixel9  pixel10  pixel11  pixel12  pixel13  pixel14  pixel15  \
0       0       0        0        0        0        0        0        0   
1       0       0        0        0        0        0        0        0   
2       0       0        0        0        0        0        0        0   
3       0       0        0        0        0        0        0        0   
4       0       0        0        0        0        0        0        0   

   pixel16  pixel17  pixel18      
0        0        0        0 ...  
1        0        0        0 ...  
2        0        0        0 ...  
3        0        0        0 ...  
4        0        0        0 ...  

[5 rows x 785 columns]
   pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \
0       0       0       0       0       0       0       0       0       0   
1       0       0       0       0       0       0       0       0       0   
2       0       0       0       0       0       0       0       0       0   
3       0       0       0       0       0       0       0       0       0   
4       0       0       0       0       0       0       0       0       0   

   pixel9  pixel10  pixel11  pixel12  pixel13  pixel14  pixel15  pixel16  \
0       0        0        0        0        0        0        0        0   
1       0        0        0        0        0        0        0        0   
2       0        0        0        0        0        0        0        0   
3       0        0        0        0        0        0        0        0   
4       0        0        0        0        0        0        0        0   

   pixel17  pixel18  pixel19      
0        0        0        0 ...  
1        0        0        0 ...  
2        0        0        0 ...  
3        0        0        0 ...  
4        0        0        0 ...  

[5 rows x 784 columns]

In [8]:
labels = dataset.iloc[:,0]
images = dataset.iloc[:,1:].values
images = images.astype(np.float)

# convert from [0:255] => [0.0:1.0]
images = np.multiply(images, 1.0 / 255.0)

print('images({0[0]},{0[1]})'.format(images.shape))


images(42000,784)

In [10]:
def display(img):
    one_image = img.reshape(28, 28)
    plt.axis('off')
    plt.imshow(one_image, cmap=cm.binary)
display(images[image_to_display])



In [14]:
labels[image_to_display]


Out[14]:
8

In [16]:
# print information about image size, label of image-to-display and number of labels
labels_flat = dataset[[0]].values.ravel()
print('length of one image ({0})'.format(len(labels_flat)))
print ('label of image [{0}] => {1}'.format(image_to_display, labels_flat[image_to_display]))

labels_count = np.unique(labels_flat).shape[0]
print('number of labels => {0}'.format(labels_count))


length of one image (42000)
label of image [20] => 8
number of labes => 10

In [18]:
# convert class labels from scalars to one-hot vectors
# 0 => [1 0 0 0 0 0 0 0 0 0]
# 1 => [0 1 0 0 0 0 0 0 0 0]
# ...
# 9 => [0 0 0 0 0 0 0 0 0 1]

def dense_to_one_hot(labels_dense, num_classes):
    num_labels = labels_dense.shape[0]
    print num_labels
    index_offset = np.arange(num_labels) * num_classes
    print index_offset
    labels_one_hot = np.zeros((num_labels, num_classes))
    print labels_one_hot
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot

labels = dense_to_one_hot(labels_flat, labels_count)
labels = labels.astype(np.uint8)

print('labels({0[0]},{0[1]})'.format(labels.shape))
print ('labels vector for image [{0}] => {1}'.format(image_to_display,labels[image_to_display]))


42000
[     0     10     20 ..., 419970 419980 419990]
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
labels(42000,10)
labels vector for image [20] => [0 0 0 0 0 0 0 0 1 0]

In [22]:
# split data into training & validation sets
validation_images = images[:validation_size]
validation_labels = labels[:validation_size]

train_images = images[validation_size:]
train_labels = labels[validation_size:]
train_labels_flat = labels_flat[validation_size:]


print('train data size({0[0]},{0[1]})'.format(train_images.shape))
print('validation data size({0[0]},{0[1]})'.format(validation_images.shape))


train data size(41872,784)
validation data size(128,784)

In [11]:
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])

# Define weights
weights = {
    # Hidden layer weights => 2*n_hidden because of forward + backward cells
    'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([n_classes]))
}

In [12]:
def BiRNN(x, weights, biases):

    # Prepare data shape to match `bidirectional_rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    # Reshape to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.split(0, n_steps, x)

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    try:
        outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                              dtype=tf.float32)
    except Exception: # Old TensorFlow version only returns outputs not states
        outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                        dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

In [13]:
pred = BiRNN(x, weights, biases)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.initialize_all_variables()


WARNING:tensorflow:<tensorflow.python.ops.rnn_cell.BasicLSTMCell object at 0x1062b6e50>: Using a concatenated state is slower and will soon be deprecated.  Use state_is_tuple=True.
WARNING:tensorflow:<tensorflow.python.ops.rnn_cell.BasicLSTMCell object at 0x1062b6e50>: Using a concatenated state is slower and will soon be deprecated.  Use state_is_tuple=True.
WARNING:tensorflow:<tensorflow.python.ops.rnn_cell.BasicLSTMCell object at 0x1062b6b10>: Using a concatenated state is slower and will soon be deprecated.  Use state_is_tuple=True.
WARNING:tensorflow:<tensorflow.python.ops.rnn_cell.BasicLSTMCell object at 0x1062b6b10>: Using a concatenated state is slower and will soon be deprecated.  Use state_is_tuple=True.

In [19]:
# Strutified shuffle is used insted of simple shuffle in order to achieve sample balancing
    # or equal number of examples in each of 10 classes.
# Since there are different number of examples for each 10 classes in the MNIST data you may
    # also use simple shuffle.

def stratified_shuffle(labels, num_classes):
    ix = np.argsort(labels).reshape((num_classes,-1))
    for i in range(len(ix)):
        np.random.shuffle(ix[i])
    return ix.T.reshape((-1))

In [25]:
# Training
epochs_completed = 0
index_in_epoch = 0
num_examples = train_images.shape[0]

# visualisation variables
train_accuracies = []
validation_accuracies = []
x_range = []

# serve data by batches
def next_batch(batch_size):
    
    global train_images
    global train_labels
    global train_labels_flat
    global index_in_epoch
    global epochs_completed
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    # when all trainig data have been already used, it is reorder randomly    
    if index_in_epoch > num_examples:
        # finished epoch
        epochs_completed += 1
        # shuffle the data
        perm = np.arange(num_examples)
        np.random.shuffle(perm)
        #perm = stratified_shuffle(train_labels_flat, 10)
        train_images = train_images[perm]
        train_labels = train_labels[perm]
        train_labels_flat = train_labels_flat[perm]
        # start next epoch
        start = 0
        index_in_epoch = batch_size
        assert batch_size <= num_examples
    end = index_in_epoch
    return train_images[start:end], train_labels[start:end]

In [26]:
# Launch the graph
saver = tf.train.Saver(max_to_keep=5)
sess = tf.Session()
sess.run(tf.initialize_all_variables())

ckpt = tf.train.get_checkpoint_state('.')
if ckpt and ckpt.model_checkpoint_path:
    saver.restore(sess, ckpt.model_checkpoint_path)
    print("Restored session from: %s" % ckpt.model_checkpoint_path)
else:
    print("No checkpoint found.")
    
start_step = global_step.eval(sess)
for i in range(start_step, min(start_step + ITERATIONS_PER_RUN, TRAINING_ITERATIONS)):
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        batch_x, batch_y = next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, n_steps, n_input))
        # Run optimization op (backprop)
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        if step % display_step == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            # Calculate batch loss
            loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
            print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
        step += 1
    print("Optimization Finished!")

    test_len = 128
    test_data =  validation_images.reshape((-1, n_steps, n_input))
    test_label = validation_labels
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={x: test_data, y: test_label}))
    saver.save(sess, 'model', global_step=i+1)


Iter 1280, Minibatch Loss= 1.755664, Training Accuracy= 0.42969
Iter 2560, Minibatch Loss= 1.212868, Training Accuracy= 0.62500
Iter 3840, Minibatch Loss= 0.904883, Training Accuracy= 0.68750
Iter 5120, Minibatch Loss= 0.811659, Training Accuracy= 0.73438
Iter 6400, Minibatch Loss= 0.635554, Training Accuracy= 0.78125
Iter 7680, Minibatch Loss= 0.610600, Training Accuracy= 0.81250
Iter 8960, Minibatch Loss= 0.628577, Training Accuracy= 0.82812
Iter 10240, Minibatch Loss= 0.525954, Training Accuracy= 0.83594
Iter 11520, Minibatch Loss= 0.320729, Training Accuracy= 0.89844
Iter 12800, Minibatch Loss= 0.457968, Training Accuracy= 0.83594
Iter 14080, Minibatch Loss= 0.287802, Training Accuracy= 0.89062
Iter 15360, Minibatch Loss= 0.270891, Training Accuracy= 0.93750
Iter 16640, Minibatch Loss= 0.316820, Training Accuracy= 0.89844
Iter 17920, Minibatch Loss= 0.298229, Training Accuracy= 0.90625
Iter 19200, Minibatch Loss= 0.327001, Training Accuracy= 0.85938
Iter 20480, Minibatch Loss= 0.300310, Training Accuracy= 0.92188
Iter 21760, Minibatch Loss= 0.288746, Training Accuracy= 0.91406
Iter 23040, Minibatch Loss= 0.251030, Training Accuracy= 0.94531
Iter 24320, Minibatch Loss= 0.188075, Training Accuracy= 0.92969
Iter 25600, Minibatch Loss= 0.212189, Training Accuracy= 0.92969
Iter 26880, Minibatch Loss= 0.191988, Training Accuracy= 0.91406
Iter 28160, Minibatch Loss= 0.174374, Training Accuracy= 0.93750
Iter 29440, Minibatch Loss= 0.231610, Training Accuracy= 0.91406
Iter 30720, Minibatch Loss= 0.216877, Training Accuracy= 0.92188
Iter 32000, Minibatch Loss= 0.191306, Training Accuracy= 0.92969
Iter 33280, Minibatch Loss= 0.219689, Training Accuracy= 0.92969
Iter 34560, Minibatch Loss= 0.173724, Training Accuracy= 0.94531
Iter 35840, Minibatch Loss= 0.102239, Training Accuracy= 0.96875
Iter 37120, Minibatch Loss= 0.109119, Training Accuracy= 0.96094
Iter 38400, Minibatch Loss= 0.109292, Training Accuracy= 0.98438
Iter 39680, Minibatch Loss= 0.306919, Training Accuracy= 0.90625
Iter 40960, Minibatch Loss= 0.203778, Training Accuracy= 0.92188
Iter 42240, Minibatch Loss= 0.284107, Training Accuracy= 0.89844
Iter 43520, Minibatch Loss= 0.138126, Training Accuracy= 0.97656
Iter 44800, Minibatch Loss= 0.114046, Training Accuracy= 0.96094
Iter 46080, Minibatch Loss= 0.071983, Training Accuracy= 0.98438
Iter 47360, Minibatch Loss= 0.103043, Training Accuracy= 0.96094
Iter 48640, Minibatch Loss= 0.144249, Training Accuracy= 0.95312
Iter 49920, Minibatch Loss= 0.139357, Training Accuracy= 0.95312
Iter 51200, Minibatch Loss= 0.133279, Training Accuracy= 0.96875
Iter 52480, Minibatch Loss= 0.121659, Training Accuracy= 0.95312
Iter 53760, Minibatch Loss= 0.134236, Training Accuracy= 0.94531
Iter 55040, Minibatch Loss= 0.114976, Training Accuracy= 0.94531
Iter 56320, Minibatch Loss= 0.079626, Training Accuracy= 0.96875
Iter 57600, Minibatch Loss= 0.100246, Training Accuracy= 0.96875
Iter 58880, Minibatch Loss= 0.122625, Training Accuracy= 0.96094
Iter 60160, Minibatch Loss= 0.118390, Training Accuracy= 0.96094
Iter 61440, Minibatch Loss= 0.185835, Training Accuracy= 0.94531
Iter 62720, Minibatch Loss= 0.106119, Training Accuracy= 0.95312
Iter 64000, Minibatch Loss= 0.115121, Training Accuracy= 0.97656
Iter 65280, Minibatch Loss= 0.115135, Training Accuracy= 0.95312
Iter 66560, Minibatch Loss= 0.117334, Training Accuracy= 0.97656
Iter 67840, Minibatch Loss= 0.126765, Training Accuracy= 0.97656
Iter 69120, Minibatch Loss= 0.138394, Training Accuracy= 0.95312
Iter 70400, Minibatch Loss= 0.071930, Training Accuracy= 0.98438
Iter 71680, Minibatch Loss= 0.078006, Training Accuracy= 0.96094
Iter 72960, Minibatch Loss= 0.122468, Training Accuracy= 0.95312
Iter 74240, Minibatch Loss= 0.109707, Training Accuracy= 0.96094
Iter 75520, Minibatch Loss= 0.074332, Training Accuracy= 0.97656
Iter 76800, Minibatch Loss= 0.100400, Training Accuracy= 0.97656
Iter 78080, Minibatch Loss= 0.076505, Training Accuracy= 0.98438
Iter 79360, Minibatch Loss= 0.146538, Training Accuracy= 0.96094
Iter 80640, Minibatch Loss= 0.142548, Training Accuracy= 0.96094
Iter 81920, Minibatch Loss= 0.033582, Training Accuracy= 0.99219
Iter 83200, Minibatch Loss= 0.075734, Training Accuracy= 0.96094
Iter 84480, Minibatch Loss= 0.100758, Training Accuracy= 0.96094
Iter 85760, Minibatch Loss= 0.054176, Training Accuracy= 0.98438
Iter 87040, Minibatch Loss= 0.082799, Training Accuracy= 0.97656
Iter 88320, Minibatch Loss= 0.115938, Training Accuracy= 0.98438
Iter 89600, Minibatch Loss= 0.101315, Training Accuracy= 0.97656
Iter 90880, Minibatch Loss= 0.077859, Training Accuracy= 0.96875
Iter 92160, Minibatch Loss= 0.079088, Training Accuracy= 0.97656
Iter 93440, Minibatch Loss= 0.082847, Training Accuracy= 0.97656
Iter 94720, Minibatch Loss= 0.075818, Training Accuracy= 0.96875
Iter 96000, Minibatch Loss= 0.052176, Training Accuracy= 0.98438
Iter 97280, Minibatch Loss= 0.071600, Training Accuracy= 0.96875
Iter 98560, Minibatch Loss= 0.078695, Training Accuracy= 0.96875
Iter 99840, Minibatch Loss= 0.095374, Training Accuracy= 0.98438
Optimization Finished!
('Testing Accuracy:', 0.984375)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-d1a26b3e8377> in <module>()
     24     test_label = validation_labels
     25     print("Testing Accuracy:",         sess.run(accuracy, feed_dict={x: test_data, y: test_label}))
---> 26     saver.save(sess, 'model', global_step=i+1)

NameError: name 'i' is not defined

In [ ]: