notebook.community

Edit and run



In [1]:

    
from __future__ import print_function

import sys
import os
import time
import string

import numpy as np
import theano
import theano.tensor as T

import os; import sys; sys.path.append('..')
import gp

import gp.nets as nets
import gp.nets.BatchNormLayer as BatchNormLayer

import lasagne

sys.setrecursionlimit(10000)









    



Using gpu device 0: GeForce GTX TITAN (CNMeM is disabled, CuDNN 4007)
/home/d/nolearn/local/lib/python2.7/site-packages/theano/tensor/signal/downsample.py:6: UserWarning: downsample module has been moved to the theano.tensor.signal.pool module.
  "downsample module has been moved to the theano.tensor.signal.pool module.")



In [2]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
# helper function for projection_b
def ceildiv(a, b):
    return -(-a // b)

def build_cnn(input_var=None, n=1, num_filters=8, cudnn='no'):
    import lasagne # For some odd reason it can't read the global import, please PR/Issue if you know why
    projection_type = 'B'
    # Setting up layers
    if cudnn == 'yes':
        import lasagne.layers.dnn
        conv = lasagne.layers.dnn.Conv2DDNNLayer # cuDNN
    else:
        conv = lasagne.layers.Conv2DLayer
    dropout = lasagne.layers.DropoutLayer
    nonlin = lasagne.nonlinearities.rectify
    nonlin_layer = lasagne.layers.NonlinearityLayer
    sumlayer = lasagne.layers.ElemwiseSumLayer
    #batchnorm = BatchNormLayer.BatchNormLayer
    batchnorm = lasagne.layers.BatchNormLayer

    # Setting the projection type for when reducing height/width
    # and increasing dimensions.
    # Default is 'B' as B performs slightly better
    # and A requires newer version of lasagne with ExpressionLayer
    projection_type = 'B'
    if projection_type == 'A':
        expression = lasagne.layers.ExpressionLayer
        pad = lasagne.layers.PadLayer

    if projection_type == 'A':
        # option A for projection as described in paper
        # (should perform slightly worse than B)
        def projection(l_inp):
            n_filters = l_inp.output_shape[1]*2
            l = expression(l_inp, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], ceildiv(s[2], 2), ceildiv(s[3], 2)))
            l = pad(l, [n_filters//4,0,0], batch_ndim=1)
            return l

    if projection_type == 'B':
        # option B for projection as described in paper
        def projection(l_inp):
            # twice normal channels when projecting!
            n_filters = l_inp.output_shape[1]*2 
            l = conv(l_inp, num_filters=n_filters, filter_size=(1, 1),
                     stride=(2, 2), nonlinearity=None, pad='same', b=None)
            l = batchnorm(l)
            return l

    # helper function to handle filters/strides when increasing dims
    def filters_increase_dims(l, increase_dims):
        in_num_filters = l.output_shape[1]
        if increase_dims:
            first_stride = (2, 2)
            out_num_filters = in_num_filters*2
        else:
            first_stride = (1, 1)
            out_num_filters = in_num_filters
 
        return out_num_filters, first_stride

    # block as described and used in cifar in the original paper:
    # http://arxiv.org/abs/1512.03385
    def res_block_v1(l_inp, nonlinearity=nonlin, increase_dim=False):
        # first figure filters/strides
        n_filters, first_stride = filters_increase_dims(l_inp, increase_dim)
        # conv -> BN -> nonlin -> conv -> BN -> sum -> nonlin
        l = conv(l_inp, num_filters=n_filters, filter_size=(3, 3),
                 stride=first_stride, nonlinearity=None, pad='same',
                 W=lasagne.init.HeNormal(gain='relu'))
        l = batchnorm(l)

        l = nonlin_layer(l, nonlinearity=nonlin)
#         l = dropout(l, p=.2)
#         print('adding dropout')        
        
        l = conv(l, num_filters=n_filters, filter_size=(3, 3),
                 stride=(1, 1), nonlinearity=None, pad='same',
                 W=lasagne.init.HeNormal(gain='relu'))
        l = batchnorm(l)
        if increase_dim:
            # Use projection (A, B) as described in paper
            p = projection(l_inp)
        else:
            # Identity shortcut
            p = l_inp
        l = sumlayer([l, p])
        l = nonlin_layer(l, nonlinearity=nonlin)
        return l

    # block as described in second paper on the subject (by same authors):
    # http://arxiv.org/abs/1603.05027
    def res_block_v2(l_inp, nonlinearity=nonlin, increase_dim=False):
        # first figure filters/strides
        n_filters, first_stride = filters_increase_dims(l_inp, increase_dim)
        # BN -> nonlin -> conv -> BN -> nonlin -> conv -> sum
        l = batchnorm(l_inp)
        l = nonlin_layer(l, nonlinearity=nonlin)
        l = conv(l, num_filters=n_filters, filter_size=(3, 3),
                 stride=first_stride, nonlinearity=None, pad='same',
                 W=lasagne.init.HeNormal(gain='relu'))
        l = batchnorm(l)
        l = nonlin_layer(l, nonlinearity=nonlin)
        l = conv(l, num_filters=n_filters, filter_size=(3, 3),
                 stride=(1, 1), nonlinearity=None, pad='same',
                 W=lasagne.init.HeNormal(gain='relu'))
        if increase_dim:
            # Use projection (A, B) as described in paper
            p = projection(l_inp)
        else:
            # Identity shortcut
            p = l_inp
        l = sumlayer([l, p])
        return l

    def bottleneck_block(l_inp, nonlinearity=nonlin, increase_dim=False):
        # first figure filters/strides
        n_filters, first_stride = filters_increase_dims(l_inp, increase_dim)
        # conv -> BN -> nonlin -> conv -> BN -> nonlin -> conv -> BN -> sum
        # -> nonlin
        # first make the bottleneck, scale the filters ..!
        scale = 4 # as per bottleneck architecture used in paper
        scaled_filters = n_filters/scale
        l = conv(l_inp, num_filters=scaled_filters, filter_size=(1, 1),
                 stride=first_stride, nonlinearity=None, pad='same',
                 W=lasagne.init.HeNormal(gain='relu'))
        l = batchnorm(l)
        l = nonlin_layer(l, nonlinearity=nonlin)
        l = conv(l, num_filters=scaled_filters, filter_size=(3, 3),
                 stride=(1, 1), nonlinearity=None, pad='same',
                 W=lasagne.init.HeNormal(gain='relu'))
        l = batchnorm(l)
        l = nonlin_layer(l, nonlinearity=nonlin)
        l = conv(l, num_filters=n_filters, filter_size=(1, 1),
                 stride=(1, 1), nonlinearity=None, pad='same',
                 W=lasagne.init.HeNormal(gain='relu'))
        if increase_dim:
            # Use projection (A, B) as described in paper
            p = projection(l_inp)
        else:
            # Identity shortcut
            p = l_inp
        l = sumlayer([l, p])
        l = nonlin_layer(l, nonlinearity=nonlin)
        return l

    # Bottleneck architecture with more efficiency (the post with Kaiming He's response)
    # https://www.reddit.com/r/MachineLearning/comments/3ywi6x/deep_residual_learning_the_bottleneck/ 
    def bottleneck_block_fast(l_inp, nonlinearity=nonlin, increase_dim=False):
        # first figure filters/strides
        n_filters, last_stride = filters_increase_dims(l_inp, increase_dim)
        # conv -> BN -> nonlin -> conv -> BN -> nonlin -> conv -> BN -> sum
        # -> nonlin
        # first make the bottleneck, scale the filters ..!
        scale = 4 # as per bottleneck architecture used in paper
        scaled_filters = n_filters/scale
        l = conv(l_inp, num_filters=scaled_filters, filter_size=(1, 1),
                 stride=(1, 1), nonlinearity=None, pad='same',
                 W=lasagne.init.HeNormal(gain='relu'))
        l = batchnorm(l)
        l = nonlin_layer(l, nonlinearity=nonlin)
        l = conv(l, num_filters=scaled_filters, filter_size=(3, 3),
                 stride=(1, 1), nonlinearity=None, pad='same',
                 W=lasagne.init.HeNormal(gain='relu'))
        l = batchnorm(l)
        l = nonlin_layer(l, nonlinearity=nonlin)
        l = conv(l, num_filters=n_filters, filter_size=(1, 1),
                 stride=last_stride, nonlinearity=None, pad='same',
                 W=lasagne.init.HeNormal(gain='relu'))
        if increase_dim:
            # Use projection (A, B) as described in paper
            p = projection(l_inp)
        else:
            # Identity shortcut
            p = l_inp
        l = sumlayer([l, p])
        l = nonlin_layer(l, nonlinearity=nonlin)
        return l
       
    res_block = res_block_v1

    # Stacks the residual blocks, makes it easy to model size of architecture with int n   
    def blockstack(l, n, nonlinearity=nonlin):
        print('NNN',n)
        for _ in range(n):
            print ('new')
            l = res_block(l, nonlinearity=nonlin)
        return l

    # Building the network
    l_in = lasagne.layers.InputLayer(shape=(None, 4, 75, 75),
                                        input_var=input_var)
    # First layer! just a plain convLayer
    l1 = conv(l_in, num_filters=num_filters, stride=(1, 1),
              filter_size=(3, 3), nonlinearity=None, pad='same')
    l1 = batchnorm(l1)
    l1 = nonlin_layer(l1, nonlinearity=nonlin)

    # Stacking bottlenecks and increasing dims! (while reducing shape size)
#     l1_bs = blockstack(l1, n=n)
#     l1_id = res_block(l1_bs, increase_dim=True)

#     l2_bs = blockstack(l1_id, n=n)
#     l2_id = res_block(l2_bs, increase_dim=True)

#     l3_bs = blockstack(l2_id, n=n)

    l3_bs = blockstack(l1, n=n)

    l3_do = dropout(l3_bs, p=.5)
    
    # And, finally, the 10-unit output layer:
    network = lasagne.layers.DenseLayer(
            l3_do,
#             l1,
            num_units=2,
            nonlinearity=lasagne.nonlinearities.softmax)

    return network


# ############################# Batch iterator ###############################
# This is just a simple helper function iterating over training data in
# mini-batches of a particular size, optionally in random order. It assumes
# data is available as numpy arrays. For big datasets, you could load numpy
# arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your
# own custom data iteration function. For small datasets, you can also copy
# them to GPU at once for slightly improved performance. This would involve
# several changes in the main program, though, and is not demonstrated here.

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
            
            Xb = inputs[excerpt]
            yb = targets[excerpt]
            
            Xb = Xb - .5
            
            k_s = np.array([0,1,2,3],dtype=np.uint8)
            for i in range(len(Xb)):
                k = np.random.choice(k_s)
                for j in range(Xb.shape[1]):
                    Xb[j][0] = np.rot90(Xb[j][0], k)
                    
            yield Xb, yb
            
#         yield inputs[excerpt], targets[excerpt]



In [3]:

    
PATCH_PATH = ('ipmlb')
X_train, y_train, X_test, y_test = gp.Patch.load_rgba(PATCH_PATH)









    



Loaded /home/d/patches//ipmlb/ in 0.192438840866 seconds.



In [4]:

    
X_val = X_train[-X_train.shape[0]/4:]
y_val = y_train[-X_train.shape[0]/4:]



In [5]:

    
X_train2 = X_train[:-X_train.shape[0]/4]
y_train2 = y_train[:-X_train.shape[0]/4]



In [6]:

    
n=2
num_filters=32
num_epochs=200
cudnn='yes'
print(n)



In [13]:

    
# Prepare Theano variables for inputs and targets
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')

# Create neural network model (depending on first command line parameter)
print("Building model and compiling functions...")
network = build_cnn(input_var, n, num_filters, cudnn)
all_layers = lasagne.layers.get_all_layers(network)
num_params = lasagne.layers.count_params(network)
num_conv = 0
num_nonlin = 0
num_input = 0
num_batchnorm = 0
num_elemsum = 0
num_dense = 0
num_unknown = 0
print("  layer output shapes:")
for layer in all_layers:
    name = string.ljust(layer.__class__.__name__, 32)
    print("    %s %s" %(name, lasagne.layers.get_output_shape(layer)))
    if "Conv2D" in name:
        num_conv += 1
    elif "NonlinearityLayer" in name:
        num_nonlin += 1
    elif "InputLayer" in name:
        num_input += 1
    elif "BatchNormLayer" in name:
        num_batchnorm += 1
    elif "ElemwiseSumLayer" in name:
        num_elemsum += 1
    elif "DenseLayer" in name:
        num_dense += 1
    else:
        num_unknown += 1
print("  no. of InputLayers: %d" % num_input)
print("  no. of Conv2DLayers: %d" % num_conv)
print("  no. of BatchNormLayers: %d" % num_batchnorm)
print("  no. of NonlinearityLayers: %d" % num_nonlin)
print("  no. of DenseLayers: %d" % num_dense)
print("  no. of ElemwiseSumLayers: %d" % num_elemsum)
print("  no. of Unknown Layers: %d" % num_unknown)
print("  total no. of layers: %d" % len(all_layers))
print("  no. of parameters: %d" % num_params)
# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our multi-class problem, it is the cross-entropy loss):
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
# We could add some weight decay as well here, see lasagne.regularization.

# Create update expressions for training, i.e., how to modify the
# parameters at each training step. Here, we'll use Stochastic Gradient
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
params = lasagne.layers.get_all_params(network, trainable=True)

# several learning rates for low initial learning rates and
# learning rate anealing (id is epoch)
# learning_rate_schedule = {
# 0: 0.0001, # low initial learning rate as described in paper
# 2: 0.01,
# 100: 0.001,
# 150: 0.0001
# }


learning_rate = theano.shared(np.float32(0.03))
momentum = theano.shared(np.float32(0.9))

updates = lasagne.updates.nesterov_momentum(
        loss, params, learning_rate=learning_rate, momentum=momentum)

# Create a loss expression for validation/testing. The crucial difference
# here is that we do a deterministic forward pass through the network,
# disabling dropout layers.
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                        target_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                  dtype=theano.config.floatX)

# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates)

# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])









    



Building model and compiling functions...
NNN 2
new
new
  layer output shapes:
    InputLayer                       (None, 4, 75, 75)
    Conv2DDNNLayer                   (None, 32, 75, 75)
    BatchNormLayer                   (None, 32, 75, 75)
    NonlinearityLayer                (None, 32, 75, 75)
    Conv2DDNNLayer                   (None, 32, 75, 75)
    BatchNormLayer                   (None, 32, 75, 75)
    NonlinearityLayer                (None, 32, 75, 75)
    Conv2DDNNLayer                   (None, 32, 75, 75)
    BatchNormLayer                   (None, 32, 75, 75)
    ElemwiseSumLayer                 (None, 32, 75, 75)
    NonlinearityLayer                (None, 32, 75, 75)
    Conv2DDNNLayer                   (None, 32, 75, 75)
    BatchNormLayer                   (None, 32, 75, 75)
    NonlinearityLayer                (None, 32, 75, 75)
    Conv2DDNNLayer                   (None, 32, 75, 75)
    BatchNormLayer                   (None, 32, 75, 75)
    ElemwiseSumLayer                 (None, 32, 75, 75)
    NonlinearityLayer                (None, 32, 75, 75)
    DropoutLayer                     (None, 32, 75, 75)
    DenseLayer                       (None, 2)
  no. of InputLayers: 1
  no. of Conv2DLayers: 5
  no. of BatchNormLayers: 5
  no. of NonlinearityLayers: 5
  no. of DenseLayers: 1
  no. of ElemwiseSumLayers: 2
  no. of Unknown Layers: 1
  total no. of layers: 20
  no. of parameters: 398818



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [15]:

    
ls = np.linspace(0.001, 0.00001, 500)
ms = np.linspace(0.9,0.999, 500)



In [12]:

    
ms









    Out[12]:





array([ 0.9       ,  0.90004952,  0.90009905, ...,  0.99890095,
        0.99895048,  0.999     ])



In [16]:

    
# Finally, launch the training loop.
print("Starting training...")
t_loss = []
v_loss = []
# We iterate over epochs:
for epoch in range(num_epochs):
#     if epoch in learning_rate_schedule:
#         lr = np.float32(learning_rate_schedule[epoch])
#         print(" setting learning rate to %.7f" % lr)
    
    learning_rate.set_value(np.float32(ls[epoch]))
    momentum.set_value(np.float32(ms[epoch]))
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train2, y_train2, 128, shuffle=False):
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, 128, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))
    
    t_loss.append(train_err / train_batches)
    v_loss.append(val_err / val_batches)









    



Starting training...
Epoch 1 of 200 took 662.606s
  training loss:		0.937953
  validation loss:		0.223316
  validation accuracy:		91.60 %
Epoch 2 of 200 took 664.411s
  training loss:		0.226017
  validation loss:		0.201293
  validation accuracy:		92.68 %
Epoch 3 of 200 took 687.225s
  training loss:		0.196231
  validation loss:		0.189342
  validation accuracy:		93.29 %
Epoch 4 of 200 took 659.851s
  training loss:		0.179828
  validation loss:		0.184115
  validation accuracy:		93.38 %
Epoch 5 of 200 took 733.110s
  training loss:		0.167589
  validation loss:		0.168800
  validation accuracy:		94.01 %
Epoch 6 of 200 took 662.906s
  training loss:		0.156839
  validation loss:		0.164934
  validation accuracy:		94.10 %
Epoch 7 of 200 took 665.412s
  training loss:		0.146163
  validation loss:		0.155299
  validation accuracy:		94.55 %
Epoch 8 of 200 took 666.527s
  training loss:		0.135897
  validation loss:		0.148727
  validation accuracy:		94.76 %
Epoch 9 of 200 took 666.543s
  training loss:		0.127363
  validation loss:		0.139568
  validation accuracy:		95.17 %
Epoch 10 of 200 took 667.111s
  training loss:		0.118440
  validation loss:		0.137999
  validation accuracy:		95.21 %
Epoch 11 of 200 took 666.885s
  training loss:		0.109327
  validation loss:		0.124119
  validation accuracy:		95.82 %
Epoch 12 of 200 took 665.790s
  training loss:		0.102899
  validation loss:		0.126674
  validation accuracy:		95.74 %
Epoch 13 of 200 took 663.812s
  training loss:		0.096295
  validation loss:		0.123467
  validation accuracy:		95.81 %
Epoch 14 of 200 took 660.642s
  training loss:		0.089404
  validation loss:		0.104791
  validation accuracy:		96.70 %
Epoch 15 of 200 took 657.007s
  training loss:		0.084205
  validation loss:		0.105902
  validation accuracy:		96.53 %
Epoch 16 of 200 took 657.243s
  training loss:		0.079594
  validation loss:		0.101337
  validation accuracy:		96.73 %
Epoch 17 of 200 took 659.938s
  training loss:		0.074845
  validation loss:		0.091654
  validation accuracy:		97.18 %
Epoch 18 of 200 took 658.894s
  training loss:		0.070366
  validation loss:		0.089954
  validation accuracy:		97.18 %
Epoch 19 of 200 took 659.680s
  training loss:		0.067047
  validation loss:		0.080955
  validation accuracy:		97.56 %
Epoch 20 of 200 took 663.577s
  training loss:		0.063577
  validation loss:		0.082177
  validation accuracy:		97.46 %
Epoch 21 of 200 took 657.155s
  training loss:		0.060548
  validation loss:		0.080996
  validation accuracy:		97.55 %
Epoch 22 of 200 took 658.137s
  training loss:		0.057299
  validation loss:		0.074788
  validation accuracy:		97.77 %
Epoch 23 of 200 took 657.487s
  training loss:		0.055619
  validation loss:		0.071725
  validation accuracy:		97.96 %
Epoch 24 of 200 took 663.795s
  training loss:		0.052473
  validation loss:		0.072526
  validation accuracy:		97.90 %
Epoch 25 of 200 took 658.621s
  training loss:		0.051052
  validation loss:		0.068644
  validation accuracy:		98.09 %
Epoch 26 of 200 took 657.160s
  training loss:		0.047935
  validation loss:		0.064766
  validation accuracy:		98.23 %
Epoch 27 of 200 took 663.590s
  training loss:		0.046345
  validation loss:		0.065319
  validation accuracy:		98.25 %
Epoch 28 of 200 took 661.698s
  training loss:		0.045118
  validation loss:		0.070969
  validation accuracy:		98.00 %
Epoch 29 of 200 took 657.249s
  training loss:		0.042683
  validation loss:		0.066242
  validation accuracy:		98.19 %
Epoch 30 of 200 took 663.999s
  training loss:		0.041307
  validation loss:		0.066110
  validation accuracy:		98.17 %
Epoch 31 of 200 took 663.793s
  training loss:		0.040973
  validation loss:		0.060375
  validation accuracy:		98.44 %
Epoch 32 of 200 took 657.646s
  training loss:		0.038105
  validation loss:		0.059657
  validation accuracy:		98.50 %
Epoch 33 of 200 took 663.888s
  training loss:		0.038133
  validation loss:		0.067607
  validation accuracy:		98.18 %
Epoch 34 of 200 took 657.576s
  training loss:		0.036906
  validation loss:		0.056237
  validation accuracy:		98.63 %
Epoch 35 of 200 took 664.014s
  training loss:		0.036314
  validation loss:		0.051644
  validation accuracy:		98.81 %
Epoch 36 of 200 took 657.686s
  training loss:		0.034619
  validation loss:		0.061010
  validation accuracy:		98.48 %
Epoch 37 of 200 took 663.985s
  training loss:		0.033807
  validation loss:		0.061600
  validation accuracy:		98.48 %
Epoch 38 of 200 took 663.525s
  training loss:		0.032480
  validation loss:		0.055099
  validation accuracy:		98.69 %
Epoch 39 of 200 took 661.946s
  training loss:		0.031126
  validation loss:		0.054035
  validation accuracy:		98.78 %
Epoch 40 of 200 took 661.845s
  training loss:		0.031411
  validation loss:		0.054087
  validation accuracy:		98.77 %
Epoch 41 of 200 took 663.690s
  training loss:		0.029822
  validation loss:		0.056645
  validation accuracy:		98.69 %
Epoch 42 of 200 took 660.735s
  training loss:		0.029895
  validation loss:		0.055020
  validation accuracy:		98.75 %
Epoch 43 of 200 took 663.765s
  training loss:		0.028971
  validation loss:		0.056711
  validation accuracy:		98.71 %
Epoch 44 of 200 took 663.534s
  training loss:		0.028184
  validation loss:		0.053281
  validation accuracy:		98.86 %
Epoch 45 of 200 took 663.664s
  training loss:		0.027933
  validation loss:		0.049880
  validation accuracy:		98.94 %
Epoch 46 of 200 took 663.416s
  training loss:		0.026475
  validation loss:		0.056416
  validation accuracy:		98.73 %
Epoch 47 of 200 took 657.543s
  training loss:		0.026823
  validation loss:		0.055897
  validation accuracy:		98.75 %
Epoch 48 of 200 took 657.988s
  training loss:		0.025803
  validation loss:		0.057899
  validation accuracy:		98.74 %
Epoch 49 of 200 took 663.945s
  training loss:		0.025874
  validation loss:		0.054348
  validation accuracy:		98.79 %
Epoch 50 of 200 took 660.083s
  training loss:		0.024649
  validation loss:		0.047255
  validation accuracy:		99.05 %
Epoch 51 of 200 took 665.081s
  training loss:		0.024287
  validation loss:		0.052188
  validation accuracy:		98.86 %
Epoch 52 of 200 took 661.586s
  training loss:		0.024219
  validation loss:		0.051638
  validation accuracy:		98.88 %
Epoch 53 of 200 took 664.298s
  training loss:		0.023321
  validation loss:		0.050389
  validation accuracy:		98.95 %
Epoch 54 of 200 took 660.701s
  training loss:		0.022970
  validation loss:		0.053118
  validation accuracy:		98.89 %
Epoch 55 of 200 took 658.973s
  training loss:		0.023467
  validation loss:		0.049495
  validation accuracy:		99.04 %
Epoch 56 of 200 took 664.368s
  training loss:		0.022338
  validation loss:		0.051100
  validation accuracy:		98.95 %
Epoch 57 of 200 took 664.473s
  training loss:		0.022697
  validation loss:		0.052051
  validation accuracy:		98.94 %
Epoch 58 of 200 took 664.073s
  training loss:		0.022099
  validation loss:		0.058287
  validation accuracy:		98.76 %
Epoch 59 of 200 took 665.214s
  training loss:		0.022367
  validation loss:		0.046671
  validation accuracy:		99.05 %
Epoch 60 of 200 took 664.504s
  training loss:		0.020793
  validation loss:		0.052088
  validation accuracy:		98.95 %
Epoch 61 of 200 took 665.758s
  training loss:		0.019419
  validation loss:		0.049025
  validation accuracy:		99.10 %
Epoch 62 of 200 took 665.126s
  training loss:		0.019939
  validation loss:		0.048744
  validation accuracy:		99.06 %
Epoch 63 of 200 took 667.139s
  training loss:		0.020278
  validation loss:		0.046441
  validation accuracy:		99.06 %
Epoch 64 of 200 took 665.361s
  training loss:		0.019602
  validation loss:		0.052416
  validation accuracy:		98.97 %
Epoch 65 of 200 took 672.012s
  training loss:		0.019823
  validation loss:		0.045010
  validation accuracy:		99.13 %
Epoch 66 of 200 took 671.612s
  training loss:		0.018273
  validation loss:		0.046685
  validation accuracy:		99.10 %
Epoch 67 of 200 took 666.796s
  training loss:		0.018982
  validation loss:		0.052471
  validation accuracy:		98.99 %
Epoch 68 of 200 took 666.159s
  training loss:		0.019384
  validation loss:		0.046585
  validation accuracy:		99.11 %
Epoch 69 of 200 took 664.275s
  training loss:		0.018489
  validation loss:		0.049853
  validation accuracy:		99.06 %
Epoch 70 of 200 took 665.192s
  training loss:		0.018842
  validation loss:		0.052743
  validation accuracy:		98.99 %
Epoch 71 of 200 took 664.890s
  training loss:		0.017918
  validation loss:		0.046609
  validation accuracy:		99.13 %






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-16-fb0ea7412741> in <module>()
     18     for batch in iterate_minibatches(X_train2, y_train2, 128, shuffle=False):
     19         inputs, targets = batch
---> 20         train_err += train_fn(inputs, targets)
     21         train_batches += 1
     22 

/home/d/nolearn/local/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    857         t0_fn = time.time()
    858         try:
--> 859             outputs = self.fn()
    860         except Exception:
    861             if hasattr(self.fn, 'position_of_error'):

KeyboardInterrupt:



In [17]:

    
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 128, shuffle=False):
    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))









    



Final results:
  test loss:			0.417827
  test accuracy:		91.68 %



In [ ]:



In [18]:

    
np.savez('/home/d/resnet3_71.npz', *lasagne.layers.get_all_param_values(network))



In [84]:

    
network.input_layer









    Out[84]:





<lasagne.layers.special.NonlinearityLayer at 0x7f674cf3b410>



In [13]:

    
def plot_loss(net):
    train_loss = [row['train_loss'] for row in net.train_history_]
    valid_loss = [row['valid_loss'] for row in net.train_history_]
    plt.plot(train_loss, label='train loss')
    plt.plot(valid_loss, label='valid loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(loc='best')
    return plt



In [14]:









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-14-eedbaacf80c0> in <module>()
----> 1 plot_loss(network)

<ipython-input-13-f862f5ec8e53> in plot_loss(net)
      1 def plot_loss(net):
----> 2     train_loss = [row['train_loss'] for row in net.train_history_]
      3     valid_loss = [row['valid_loss'] for row in net.train_history_]
      4     plt.plot(train_loss, label='train loss')
      5     plt.plot(valid_loss, label='valid loss')

AttributeError: 'DenseLayer' object has no attribute 'train_history_'



In [ ]:

    
# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
for epoch in range(200,200+num_epochs):
    if epoch in learning_rate_schedule:
        lr = np.float32(learning_rate_schedule[epoch])
        print(" setting learning rate to %.7f" % lr)
        learning_rate.set_value(lr)
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train2, y_train2, 128, shuffle=False):
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, 128, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))









    



Starting training...



In [17]:

    
learning_rate_schedule









    Out[17]:





{0: 0.0001, 2: 0.01, 100: 0.001, 150: 0.0001}



In [22]:

    
epoch









    Out[22]:





328



In [24]:

    
train_err / train_batches









    Out[24]:





0.15785868913180343



In [25]:

    
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 128, shuffle=False):
    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))









    



Final results:
  test loss:			0.190347
  test accuracy:		93.34 %



In [26]:

    
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 128, shuffle=False):
    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))









    



Final results:
  test loss:			0.190400
  test accuracy:		93.36 %



In [27]:

    
val_acc / val_batches * 100









    Out[27]:





94.174360795454547



In [28]:

    
val_err / val_batches









    Out[28]:





0.16246618782593444



In [29]:

    
np.savez('/home/d/resnet2_after328.npz', *lasagne.layers.get_all_param_values(network))



In [30]:

    
# Optionally, you could now dump the network weights to a file like this:
    # np.savez('model.npz', *lasagne.layers.get_all_param_values(network))
    #
    # And load them again later on like this:
    # with np.load('model.npz') as f:
    #     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    # lasagne.layers.set_all_param_values(network, param_values)



In [31]:

    
#
#
#



In [19]:

    
pred_fn = theano.function([input_var, target_var], [test_prediction, test_loss, test_acc])









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-19-759fc37d2e5a> in <module>()
----> 1 pred_fn = theano.function([input_var, target_var], [test_prediction, test_loss, test_acc])

/home/d/nolearn/local/lib/python2.7/site-packages/theano/compile/function.pyc in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
    318                    on_unused_input=on_unused_input,
    319                    profile=profile,
--> 320                    output_keys=output_keys)
    321     # We need to add the flag check_aliased inputs if we have any mutable or
    322     # borrowed used defined inputs

/home/d/nolearn/local/lib/python2.7/site-packages/theano/compile/pfunc.pyc in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys)
    440                                          rebuild_strict=rebuild_strict,
    441                                          copy_inputs_over=True,
--> 442                                          no_default_updates=no_default_updates)
    443     # extracting the arguments
    444     input_variables, cloned_extended_outputs, other_stuff = output_vars

/home/d/nolearn/local/lib/python2.7/site-packages/theano/compile/pfunc.pyc in rebuild_collect_shared(outputs, inputs, replace, updates, rebuild_strict, copy_inputs_over, no_default_updates)
    225                 raise TypeError('Outputs must be theano Variable or '
    226                                 'Out instances. Received ' + str(v) +
--> 227                                 ' of type ' + str(type(v)))
    228             # computed_list.append(cloned_v)
    229     else:

TypeError: Outputs must be theano Variable or Out instances. Received 125.6015625 of type <type 'numpy.float64'>



In [33]:

    
test_prediction = lasagne.layers.get_output(network, deterministic=True)



In [40]:

    
target_var.shape.eval()









    



---------------------------------------------------------------------------
MissingInputError                         Traceback (most recent call last)
<ipython-input-40-a06ce32be050> in <module>()
----> 1 target_var.shape.eval()

/home/d/nolearn/local/lib/python2.7/site-packages/theano/gof/graph.pyc in eval(self, inputs_to_values)
    518         inputs = tuple(sorted(inputs_to_values.keys(), key=id))
    519         if inputs not in self._fn_cache:
--> 520             self._fn_cache[inputs] = theano.function(inputs, self)
    521         args = [inputs_to_values[param] for param in inputs]
    522 

/home/d/nolearn/local/lib/python2.7/site-packages/theano/compile/function.pyc in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
    318                    on_unused_input=on_unused_input,
    319                    profile=profile,
--> 320                    output_keys=output_keys)
    321     # We need to add the flag check_aliased inputs if we have any mutable or
    322     # borrowed used defined inputs

/home/d/nolearn/local/lib/python2.7/site-packages/theano/compile/pfunc.pyc in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys)
    477                          accept_inplace=accept_inplace, name=name,
    478                          profile=profile, on_unused_input=on_unused_input,
--> 479                          output_keys=output_keys)
    480 
    481 

/home/d/nolearn/local/lib/python2.7/site-packages/theano/compile/function_module.pyc in orig_function(inputs, outputs, mode, accept_inplace, name, profile, on_unused_input, output_keys)
   1774                    profile=profile,
   1775                    on_unused_input=on_unused_input,
-> 1776                    output_keys=output_keys).create(
   1777             defaults)
   1778 

/home/d/nolearn/local/lib/python2.7/site-packages/theano/compile/function_module.pyc in __init__(self, inputs, outputs, mode, accept_inplace, function_builder, profile, on_unused_input, fgraph, output_keys)
   1426             # OUTPUT VARIABLES)
   1427             fgraph, additional_outputs = std_fgraph(inputs, outputs,
-> 1428                                                     accept_inplace)
   1429             fgraph.profile = profile
   1430         else:

/home/d/nolearn/local/lib/python2.7/site-packages/theano/compile/function_module.pyc in std_fgraph(input_specs, output_specs, accept_inplace)
    175 
    176     fgraph = gof.fg.FunctionGraph(orig_inputs, orig_outputs,
--> 177                                   update_mapping=update_mapping)
    178 
    179     for node in fgraph.apply_nodes:

/home/d/nolearn/local/lib/python2.7/site-packages/theano/gof/fg.pyc in __init__(self, inputs, outputs, features, clone, update_mapping)
    169 
    170         for output in outputs:
--> 171             self.__import_r__(output, reason="init")
    172         for i, output in enumerate(outputs):
    173             output.clients.append(('output', i))

/home/d/nolearn/local/lib/python2.7/site-packages/theano/gof/fg.pyc in __import_r__(self, variable, reason)
    358         # Imports the owners of the variables
    359         if variable.owner and variable.owner not in self.apply_nodes:
--> 360                 self.__import__(variable.owner, reason=reason)
    361         if (variable.owner is None and
    362                 not isinstance(variable, graph.Constant) and

/home/d/nolearn/local/lib/python2.7/site-packages/theano/gof/fg.pyc in __import__(self, apply_node, check, reason)
    472                             "for more information on this error."
    473                             % str(node)),
--> 474                             r)
    475 
    476         for node in new_nodes:

MissingInputError: ("An input of the graph, used to compute Shape(targets), was not provided and not given a value.Use the Theano flag exception_verbosity='high',for more information on this error.", targets)



In [ ]: