In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from sklearn.datasets import load_boston
boston = load_boston()
print boston.keys()
print boston.data.shape
print boston.target.shape


['data', 'feature_names', 'DESCR', 'target']
(506, 13)
(506,)

In [3]:
import numpy as np
inds = np.arange(len(boston.data))
n_train = int(len(boston.data) * 0.7)
n_dev = int(len(boston.data) * 0.2)
train_inds = inds[:n_train]
dev_inds = inds[n_train:n_train+n_dev]
test_inds = inds[n_train+n_dev:]

Xtrain, Ytrain = boston.data[train_inds], boston.target[train_inds].reshape((-1, 1))
Xdev, Ydev = boston.data[dev_inds], boston.target[dev_inds].reshape((-1, 1))
Xtest, Ytest = boston.data[test_inds], boston.target[test_inds].reshape((-1, 1))

print 'train', Xtrain.shape, Ytrain.shape
print 'dev', Xdev.shape, Ydev.shape
print 'test', Xtest.shape, Ytest.shape


train (354, 13) (354, 1)
dev (101, 13) (101, 1)
test (51, 13) (51, 1)

In [4]:
# make model
from theano import function, tensor as T
from pystacks.layers.container import Sequential, Parallel
from pystacks.layers.lookup import LookupTable
from pystacks.layers.common import *
from pystacks.regularizer import L1, L2
from pystacks.gradient_transformer import ClipGradient, ClipGradientNorm

def get_model(h1=100, h2=200, h3=100, dropout=0.5, reg=1e-5, clip=5.):
    
    Reg = L1
    
    net = Sequential([
            LinearLayer(len(boston.feature_names), h1, W_regularizer=Reg(reg), W_grad_transformer=ClipGradient(clip)),
            Dropout(dropout),
            ReLU(),
            LinearLayer(h1, h2, W_regularizer=Reg(reg), W_grad_transformer=ClipGradientNorm(clip)),
            Dropout(dropout),
            ReLU(),
            LinearLayer(h2, h3, W_regularizer=Reg(reg), W_grad_transformer=ClipGradientNorm(clip)),
            Dropout(dropout),
            ReLU(),
            LinearLayer(h3, 1, W_regularizer=Reg(reg), W_grad_transformer=ClipGradientNorm(clip)),
        ])

    X_sym = T.fmatrix()
    pred_sym = net.forward(X_sym, train=True)
    debug_pred = function([X_sym], pred_sym)

    Y_sym = T.fmatrix()
    lr_sym = T.fscalar()

    from pystacks.optimizer import Adagrad
    from pystacks.criteria import mean_squared_loss

    optimizer = Adagrad()
    loss_sym = mean_squared_loss(pred_sym, Y_sym)
    updates = net.grad_updates(loss=loss_sym, lr=lr_sym, optimizer=optimizer)

    f_train = function([X_sym, Y_sym, lr_sym], loss_sym, updates=updates, allow_input_downcast=True)
    f_pred = function([X_sym], net.forward(X_sym, train=False), allow_input_downcast=True)
    
    return net, f_train, f_pred


Couldn't import dot_parser, loading of dot files will not be possible.
Using gpu device 0: GeForce GTX 760

In [5]:
max_epoch = 300
lr = 3e-4
print_every = max_epoch / 100

import cPickle as pkl

def get_mse(pred, targ):
    return np.mean((pred - targ)**2)

best_loss = np.inf
net, f_train, f_pred = get_model(h1=500, h2=400, h3=300, dropout=0.5, reg=1e-2)

for epoch in xrange(max_epoch+1):
    loss = f_train(Xtrain, Ytrain, lr)
    pred_dev = f_pred(Xdev)
    dev_loss = get_mse(pred_dev, Ydev)
    if dev_loss < best_loss:
        best_loss = dev_loss
        with open('best.model.pkl', 'wb') as f:
            pkl.dump(net, f)
    if epoch % print_every == 0:
        print 'epoch', epoch, 'train', loss, 'dev', dev_loss


epoch 0 train 434.596046983 dev 340.166674128
epoch 3 train 232.63081414 dev 159.289847677
epoch 6 train 149.758115282 dev 94.8160824377
epoch 9 train 108.711943071 dev 86.2974049298
epoch 12 train 76.1896156988 dev 112.748505677
epoch 15 train 68.2503359956 dev 151.550999374
epoch 18 train 55.7434488953 dev 171.939189626
epoch 21 train 52.2010422571 dev 169.512032464
epoch 24 train 57.2014755088 dev 155.652052966
epoch 27 train 56.9797213861 dev 145.771894136
epoch 30 train 56.356007062 dev 139.657485284
epoch 33 train 55.0284598921 dev 134.467941326
epoch 36 train 52.4094606259 dev 133.104426144
epoch 39 train 48.9798114734 dev 124.786754607
epoch 42 train 57.1406702922 dev 118.481073478
epoch 45 train 53.7252553778 dev 115.27148656
epoch 48 train 54.6755525841 dev 112.230122734
epoch 51 train 52.5091160305 dev 111.5499398
epoch 54 train 49.3994905582 dev 108.656482198
epoch 57 train 47.8660910969 dev 107.335346305
epoch 60 train 52.2353829151 dev 105.016446372
epoch 63 train 51.985475903 dev 102.340236472
epoch 66 train 59.6425424952 dev 99.3922745809
epoch 69 train 49.7622345001 dev 99.0910321758
epoch 72 train 51.6219919432 dev 96.983094591
epoch 75 train 53.4996431691 dev 96.0983827239
epoch 78 train 53.0402221754 dev 94.4754819009
epoch 81 train 53.1030913435 dev 93.8589091408
epoch 84 train 51.1084564461 dev 92.345487883
epoch 87 train 56.0603091762 dev 91.2228755599
epoch 90 train 46.6528191259 dev 90.795698921
epoch 93 train 52.6456572619 dev 89.8245415798
epoch 96 train 50.2376037874 dev 89.3352585188
epoch 99 train 52.9541548873 dev 89.2970624381
epoch 102 train 50.3696844955 dev 88.1978671022
epoch 105 train 48.5240299084 dev 87.7343402203
epoch 108 train 52.2477150969 dev 87.1689374966
epoch 111 train 44.4191674469 dev 87.1837412874
epoch 114 train 53.2273586368 dev 86.3255991411
epoch 117 train 46.4459640295 dev 86.1624404763
epoch 120 train 50.5027406699 dev 86.1624520808
epoch 123 train 44.5208042722 dev 85.894976563
epoch 126 train 45.3712138757 dev 85.8519007785
epoch 129 train 45.7500886403 dev 85.7009590715
epoch 132 train 47.3248283729 dev 85.4925788345
epoch 135 train 45.2299545714 dev 85.1854778802
epoch 138 train 48.7729931653 dev 84.9969336403
epoch 141 train 46.7325026841 dev 84.8608240019
epoch 144 train 45.5675073659 dev 84.5156287746
epoch 147 train 42.8818001566 dev 84.5485779037
epoch 150 train 45.7526522151 dev 84.5262361032
epoch 153 train 45.5171100898 dev 84.4172897161
epoch 156 train 45.7617476006 dev 84.3416976742
epoch 159 train 46.6847845869 dev 84.3124957651
epoch 162 train 45.5865277865 dev 84.3148160223
epoch 165 train 44.6369944367 dev 84.282686241
epoch 168 train 43.2137525248 dev 84.2738483581
epoch 171 train 43.1140082297 dev 84.315270683
epoch 174 train 46.9609658211 dev 84.3897326915
epoch 177 train 47.642510393 dev 84.4413849783
epoch 180 train 46.1533837225 dev 84.5372491564
epoch 183 train 49.4577291046 dev 84.5075405072
epoch 186 train 50.4402600696 dev 84.5089423309
epoch 189 train 47.2657551881 dev 84.6553259854
epoch 192 train 44.963951055 dev 84.686407983
epoch 195 train 48.9769431733 dev 84.724873273
epoch 198 train 45.592741716 dev 84.7966884108
epoch 201 train 42.7849108913 dev 84.9291360678
epoch 204 train 41.7891285449 dev 85.2179077989
epoch 207 train 45.3900997321 dev 85.244491297
epoch 210 train 43.4741553359 dev 85.4341277856
epoch 213 train 42.9392163965 dev 85.5532199206
epoch 216 train 44.1639540415 dev 85.6933927085
epoch 219 train 44.3392505677 dev 85.7119015952
epoch 222 train 43.2612311494 dev 85.6203466062
epoch 225 train 45.3698659081 dev 85.8103267939
epoch 228 train 43.3993920108 dev 86.0765245338
epoch 231 train 43.8407409669 dev 85.845669055
epoch 234 train 42.9217832697 dev 85.9454561326
epoch 237 train 44.6679683944 dev 85.9878038568
epoch 240 train 43.7234366957 dev 86.3716574687
epoch 243 train 51.9323359164 dev 86.6527742675
epoch 246 train 41.7293455407 dev 86.8254574038
epoch 249 train 41.6541788682 dev 86.9463769823
epoch 252 train 45.7547525842 dev 87.4661610245
epoch 255 train 44.7633514285 dev 87.5196693962
epoch 258 train 45.702977851 dev 87.7097467357
epoch 261 train 47.1894346205 dev 87.8284754099
epoch 264 train 44.2237815313 dev 87.8864174376
epoch 267 train 45.5109433384 dev 87.6795024087
epoch 270 train 45.156750871 dev 87.8143603668
epoch 273 train 41.4581959374 dev 87.9169542392
epoch 276 train 43.1933099172 dev 88.1473465859
epoch 279 train 39.4383717067 dev 88.5298968056
epoch 282 train 42.9491077427 dev 88.9697590026
epoch 285 train 44.6074780818 dev 89.5039867517
epoch 288 train 44.0753476507 dev 89.1845717611
epoch 291 train 48.6509989181 dev 89.2787168631
epoch 294 train 43.6946960927 dev 89.5779025582
epoch 297 train 47.552475732 dev 89.8644722248
epoch 300 train 47.286336561 dev 89.7833002326

In [6]:
with open('best.model.pkl') as f:
    model = pkl.load(f)
    
X_sym = T.fmatrix()
pred_sym = net.forward(X_sym, train=True)
f_pred = function([X_sym], net.forward(X_sym, train=False), allow_input_downcast=True)

In [7]:
pred_test = f_pred(Xtest)
print 'test mse'
print get_mse(Ytest, pred_test)


test mse
32.8177580638

In [ ]: