In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
from sklearn.datasets import load_boston
boston = load_boston()
print boston.keys()
print boston.data.shape
print boston.target.shape
In [3]:
import numpy as np
inds = np.arange(len(boston.data))
n_train = int(len(boston.data) * 0.7)
n_dev = int(len(boston.data) * 0.2)
train_inds = inds[:n_train]
dev_inds = inds[n_train:n_train+n_dev]
test_inds = inds[n_train+n_dev:]
Xtrain, Ytrain = boston.data[train_inds], boston.target[train_inds].reshape((-1, 1))
Xdev, Ydev = boston.data[dev_inds], boston.target[dev_inds].reshape((-1, 1))
Xtest, Ytest = boston.data[test_inds], boston.target[test_inds].reshape((-1, 1))
print 'train', Xtrain.shape, Ytrain.shape
print 'dev', Xdev.shape, Ydev.shape
print 'test', Xtest.shape, Ytest.shape
In [4]:
# make model
from theano import function, tensor as T
from pystacks.layers.container import Sequential, Parallel
from pystacks.layers.lookup import LookupTable
from pystacks.layers.common import *
from pystacks.regularizer import L1, L2
from pystacks.gradient_transformer import ClipGradient, ClipGradientNorm
def get_model(h1=100, h2=200, h3=100, dropout=0.5, reg=1e-5, clip=5.):
Reg = L1
net = Sequential([
LinearLayer(len(boston.feature_names), h1, W_regularizer=Reg(reg), W_grad_transformer=ClipGradient(clip)),
Dropout(dropout),
ReLU(),
LinearLayer(h1, h2, W_regularizer=Reg(reg), W_grad_transformer=ClipGradientNorm(clip)),
Dropout(dropout),
ReLU(),
LinearLayer(h2, h3, W_regularizer=Reg(reg), W_grad_transformer=ClipGradientNorm(clip)),
Dropout(dropout),
ReLU(),
LinearLayer(h3, 1, W_regularizer=Reg(reg), W_grad_transformer=ClipGradientNorm(clip)),
])
X_sym = T.fmatrix()
pred_sym = net.forward(X_sym, train=True)
debug_pred = function([X_sym], pred_sym)
Y_sym = T.fmatrix()
lr_sym = T.fscalar()
from pystacks.optimizer import Adagrad
from pystacks.criteria import mean_squared_loss
optimizer = Adagrad()
loss_sym = mean_squared_loss(pred_sym, Y_sym)
updates = net.grad_updates(loss=loss_sym, lr=lr_sym, optimizer=optimizer)
f_train = function([X_sym, Y_sym, lr_sym], loss_sym, updates=updates, allow_input_downcast=True)
f_pred = function([X_sym], net.forward(X_sym, train=False), allow_input_downcast=True)
return net, f_train, f_pred
In [5]:
max_epoch = 300
lr = 3e-4
print_every = max_epoch / 100
import cPickle as pkl
def get_mse(pred, targ):
return np.mean((pred - targ)**2)
best_loss = np.inf
net, f_train, f_pred = get_model(h1=500, h2=400, h3=300, dropout=0.5, reg=1e-2)
for epoch in xrange(max_epoch+1):
loss = f_train(Xtrain, Ytrain, lr)
pred_dev = f_pred(Xdev)
dev_loss = get_mse(pred_dev, Ydev)
if dev_loss < best_loss:
best_loss = dev_loss
with open('best.model.pkl', 'wb') as f:
pkl.dump(net, f)
if epoch % print_every == 0:
print 'epoch', epoch, 'train', loss, 'dev', dev_loss
In [6]:
with open('best.model.pkl') as f:
model = pkl.load(f)
X_sym = T.fmatrix()
pred_sym = net.forward(X_sym, train=True)
f_pred = function([X_sym], net.forward(X_sym, train=False), allow_input_downcast=True)
In [7]:
pred_test = f_pred(Xtest)
print 'test mse'
print get_mse(Ytest, pred_test)
In [ ]: