In [1]:
from __future__ import print_function
import sys
sys.setrecursionlimit(99999)
import pdb

import numpy as np
np.random.seed(1337)  # for reproducibility

import keras

from keras.datasets import mnist, cifar10
from keras.models import Sequential, Graph
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import ZeroPadding2D, AveragePooling2D, Convolution2D
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras.layers.normalization import BatchNormalization
import residual_blocks
#** A change was made in the residual_blocks.py file, the change is represented by #**
#** Without the change i was getting an unbound local error


Using Theano backend.

In [2]:
batch_size = 128
nb_classes = 2
nb_epoch = 15
#**The program was written for CIFAR, the dimensions for the dataset are (32,32)
#**For the MNSIT part the dataset has dimensions (28,28)
#**The program was padding (28,28) to (32,32) and was running it
#**The changes I made removed this padding step, and changed the dimensions  wherever necessary for running
#**the MNIST datset

#**More changes have been mentioned below

In [3]:
def compute_padding_length(length_before, stride, length_conv):
    ''' Assumption: you want the subsampled result has a length of floor(original_length/stride).
    '''
    N = length_before
    F = length_conv
    S = stride
    if S == F:
        return 0
    if S == 1:
        return (F-1)/2
    for P in range(S):
        if (N-F+2*P)/S + 1 == N/S:
            return P
    return None

In [4]:
def design_for_residual_blocks(num_channel_input=1):
    ''''''
    model = Sequential() # it's a CONTAINER, not MODEL
    # set numbers
    num_big_blocks = 3
    image_patch_sizes = [[3,3]]*num_big_blocks
    pool_sizes = [(2,2)]*num_big_blocks
    n_features = [128, 256, 512, 512, 1024]
    n_features_next = [256, 512, 512, 512, 1024]
    #height_input = 32
    #width_input = 32
    #**Changed the above values to 28 for the MNIST Dataset, 32 is for CIFAR 
    #**This change means we dont have to add padding for (28,28) to (32,32) in the get_residual_model() function
    height_input = 28
    width_input = 28
    for conv_idx in range(num_big_blocks):    
        n_feat_here = n_features[conv_idx]
        # residual block 0
        model.add(residual_blocks.building_residual_block(  (num_channel_input, height_input, width_input),
                                                            n_feat_here,
                                                            kernel_sizes=image_patch_sizes[conv_idx]
                                                            ))

        # residual block 1 (you can add it as you want (and your resources allow..))
        if False:
            model.add(residual_blocks.building_residual_block(  (n_feat_here, height_input, width_input),
                                                                n_feat_here,
                                                                kernel_sizes=image_patch_sizes[conv_idx]
                                                                ))
        
        # the last residual block N-1
        # the last one : pad zeros, subsamples, and increase #channels
        pad_height = compute_padding_length(height_input, pool_sizes[conv_idx][0], image_patch_sizes[conv_idx][0])
        pad_width = compute_padding_length(width_input, pool_sizes[conv_idx][1], image_patch_sizes[conv_idx][1])
        #**Made pad_height and width = 0, because the above computation of them was yielding a None result
        #**leading to an error in the program
        #**Error was - unsupported operand types for * - int and None type
        pad_height = 0
        pad_width = 0
        model.add(ZeroPadding2D(padding=(pad_height,pad_width))) 
        height_input += 2*pad_height
        width_input += 2*pad_width
        n_feat_next = n_features_next[conv_idx]
        model.add(residual_blocks.building_residual_block(  (n_feat_here, height_input, width_input),
                                                            n_feat_next,
                                                            kernel_sizes=image_patch_sizes[conv_idx],
                                                            is_subsample=True,
                                                            subsample=pool_sizes[conv_idx]
                                                            ))

        height_input, width_input = model.output_shape[2:]
        # width_input  = int(width_input/pool_sizes[conv_idx][1])
        num_channel_input = n_feat_next

    # Add average pooling at the end:
    print('Average pooling, from (%d,%d) to (1,1)' % (height_input, width_input))
    model.add(AveragePooling2D(pool_size=(height_input, width_input)))

    return model

In [5]:
def get_residual_model(is_mnist=True, img_channels=1, img_rows=28, img_cols=28):
    model = keras.models.Sequential()
    first_layer_channel = 128
    if is_mnist: # size to be changed to 32,32
        model.add(ZeroPadding2D((2,2), input_shape=(img_channels, img_rows, img_cols))) # resize (28,28)-->(32,32)
        # the first conv 
        model.add(Convolution2D(first_layer_channel, 3, 3, border_mode='same'))
    else:
        model.add(Convolution2D(first_layer_channel, 3, 3, border_mode='same', input_shape=(img_channels, img_rows, img_cols)))

    model.add(Activation('relu'))
    # [residual-based Conv layers]
    residual_blocks = design_for_residual_blocks(num_channel_input=first_layer_channel)
    model.add(residual_blocks)
    model.add(BatchNormalization(axis=1))
    model.add(Activation('relu'))
    # [Classifier]    
    model.add(Flatten())
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))
    # [END]
    return model

In [6]:
def build_resnet():

    
    is_mnist = True
    is_cifar10 = not is_mnist
    if is_mnist:
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
        img_rows, img_cols = 28, 28
        img_channels = 1
        print(' == MNIST ==')
    else:
        (X_train, y_train), (X_test, y_test) = cifar10.load_data()
        img_rows, img_cols = 32, 32
        img_channels = 3
        print(' == CIFAR10 ==')

    X_train = X_train.reshape(X_train.shape[0], img_channels, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], img_channels, img_rows, img_cols)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255
    #X_train = (X_train - np.mean(X_train))/np.std(X_train)
    #X_test = (X_test - np.mean(X_test))/np.std(X_test)
    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')
    
    #Seed for reproducibilty
    np.random.seed(1338)

    #Selecting 6000 random examples from the test data
    test_rows = np.random.randint(0,X_test.shape[0],6000)
    X_test = X_test[test_rows]
    Y = y_test[test_rows]
    #Converting the output to binary classification(Six=1,Not Six=0)
    Y_test = Y == 6
    Y_test = Y_test.astype(int)

    #Selecting the 5918 examples where the output is 6
    X_six = X_train[y_train == 6]
    Y_six = y_train[y_train == 6]
    #Selecting the examples where the output is not 6
    X_not_six = X_train[y_train != 6]
    Y_not_six = y_train[y_train != 6]

    #Selecting 6000 random examples from the data that contains only the data where the output is not 6
    random_rows = np.random.randint(0,X_six.shape[0],6000)
    X_not_six = X_not_six[random_rows]
    Y_not_six = Y_not_six[random_rows]
    
    
    #Appending the data with output as 6 and data with output as not six
    X_train = np.append(X_six,X_not_six)
    #Reshaping the appended data to appropraite form
    X_train = X_train.reshape(X_six.shape[0] + X_not_six.shape[0], 1, img_rows, img_cols)
    #Appending the labels and converting the labels to binary classification(Six=1,Not Six=0)
    Y_labels = np.append(Y_six,Y_not_six)
    Y_train = Y_labels == 6 
    Y_train = Y_train.astype(int)
    
    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(Y_train, nb_classes)
    Y_test = np_utils.to_categorical(Y_test, nb_classes)
    #**Changing is_mnist to false so that it runs the else statement in the function, that is there will be no 
    #** addition of padding for (28,28) to (32,32) 
    model = get_residual_model(is_mnist=False, img_channels=img_channels, img_rows=img_rows, img_cols=img_cols)

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    
    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
              verbose=1, validation_data=(X_test, Y_test))#, callbacks=[best_model])
    score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])

In [7]:
%timeit -n1 -r1 build_resnet()


 == MNIST ==
X_train shape: (60000, 1, 28, 28)
60000 train samples
10000 test samples
   - New residual block with
      input shape: (128, 28, 28)
      kernel size: [3, 3]
        -- model was built.
   - New residual block with
      input shape: (128, 28, 28)
      kernel size: [3, 3]
      - Input channels: 128 ---> num feature maps on out: 256
      - with subsample: (2, 2)
        -- model was built.
   - New residual block with
      input shape: (256, 13, 13)
      kernel size: [3, 3]
        -- model was built.
   - New residual block with
      input shape: (256, 13, 13)
      kernel size: [3, 3]
      - Input channels: 256 ---> num feature maps on out: 512
      - with subsample: (2, 2)
        -- model was built.
   - New residual block with
      input shape: (512, 6, 6)
      kernel size: [3, 3]
        -- model was built.
   - New residual block with
      input shape: (512, 6, 6)
      kernel size: [3, 3]
      - with subsample: (2, 2)
        -- model was built.
Average pooling, from (2,2) to (1,1)
Train on 11918 samples, validate on 6000 samples
Epoch 1/15
11918/11918 [==============================] - 1599s - loss: 0.1172 - acc: 0.9640 - val_loss: 0.3572 - val_acc: 0.8845
Epoch 2/15
11918/11918 [==============================] - 1596s - loss: 0.0267 - acc: 0.9906 - val_loss: 0.0172 - val_acc: 0.9958
Epoch 3/15
11918/11918 [==============================] - 1594s - loss: 0.0138 - acc: 0.9950 - val_loss: 0.0431 - val_acc: 0.9873
Epoch 4/15
11918/11918 [==============================] - 1590s - loss: 0.0100 - acc: 0.9966 - val_loss: 0.0131 - val_acc: 0.9960
Epoch 5/15
11918/11918 [==============================] - 1590s - loss: 0.0093 - acc: 0.9970 - val_loss: 0.0112 - val_acc: 0.9978
Epoch 6/15
11918/11918 [==============================] - 1590s - loss: 0.0105 - acc: 0.9964 - val_loss: 0.0108 - val_acc: 0.9970
Epoch 7/15
11918/11918 [==============================] - 1590s - loss: 0.0044 - acc: 0.9986 - val_loss: 0.0101 - val_acc: 0.9968
Epoch 8/15
11918/11918 [==============================] - 1595s - loss: 0.0040 - acc: 0.9989 - val_loss: 0.0157 - val_acc: 0.9958
Epoch 9/15
11918/11918 [==============================] - 1599s - loss: 0.0033 - acc: 0.9990 - val_loss: 0.0275 - val_acc: 0.9930
Epoch 10/15
11918/11918 [==============================] - 1601s - loss: 0.0082 - acc: 0.9966 - val_loss: 0.0146 - val_acc: 0.9963
Epoch 11/15
11918/11918 [==============================] - 1596s - loss: 0.0049 - acc: 0.9984 - val_loss: 0.0292 - val_acc: 0.9922
Epoch 12/15
11918/11918 [==============================] - 1597s - loss: 0.0051 - acc: 0.9983 - val_loss: 0.0259 - val_acc: 0.9942
Epoch 13/15
11918/11918 [==============================] - 1599s - loss: 0.0286 - acc: 0.9920 - val_loss: 0.0210 - val_acc: 0.9930
Epoch 14/15
11918/11918 [==============================] - 1595s - loss: 0.0068 - acc: 0.9978 - val_loss: 0.0150 - val_acc: 0.9955
Epoch 15/15
11918/11918 [==============================] - 1595s - loss: 0.0069 - acc: 0.9977 - val_loss: 0.0121 - val_acc: 0.9962
/home/prajwal/anaconda3/lib/python3.5/site-packages/keras/models.py:429: UserWarning: The "show_accuracy" argument is deprecated, instead you should pass the "accuracy" metric to the model at compile time:
`model.compile(optimizer, loss, metrics=["accuracy"])`
  warnings.warn('The "show_accuracy" argument is deprecated, '
Test score: 0.0121019153625
Test accuracy: 0.996166666667
1 loop, best of 1: 6h 42min 30s per loop

In [ ]:


In [ ]:


In [ ]: