In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

Titanic Kaggle

In this example, we will show how to use pystacks to do a Kaggle competition from scratch. Doing well on this competition requires significantly more effort than shown here, particularly with respect to inferring missing features (which we do naively here by filling in the most common value). Nevertheless, this example demonstrates how to feed in data into pystacks.

Description from Kaggle:

VARIABLE DESCRIPTIONS:
survival        Survival
                (0 = No; 1 = Yes)
pclass          Passenger Class
                (1 = 1st; 2 = 2nd; 3 = 3rd)
name            Name
sex             Sex
age             Age
sibsp           Number of Siblings/Spouses Aboard
parch           Number of Parents/Children Aboard
ticket          Ticket Number
fare            Passenger Fare
cabin           Cabin
embarked        Port of Embarkation
                (C = Cherbourg; Q = Queenstown; S = Southampton)

SPECIAL NOTES:
Pclass is a proxy for socio-economic status (SES)
 1st ~ Upper; 2nd ~ Middle; 3rd ~ Lower

Age is in Years; Fractional if Age less than One (1)
 If the Age is Estimated, it is in the form xx.5

With respect to the family relation variables (i.e. sibsp and parch)
some relations were ignored.  The following are the definitions used
for sibsp and parch.

Sibling:  Brother, Sister, Stepbrother, or Stepsister of Passenger Aboard Titanic
Spouse:   Husband or Wife of Passenger Aboard Titanic (Mistresses and Fiances Ignored)
Parent:   Mother or Father of Passenger Aboard Titanic
Child:    Son, Daughter, Stepson, or Stepdaughter of Passenger Aboard Titanic

Other family relatives excluded from this study include cousins,
nephews/nieces, aunts/uncles, and in-laws.  Some children travelled
only with a nanny, therefore parch=0 for them.  As well, some
travelled with very close friends or neighbors in a village, however,
the definitions do not support such relations.

In [2]:
# we will use everything as features except for name, fare, and ticket (number)
continuous_features = ['Age', 'SibSp', 'Parch']

# furthermore, we will learn representations for discrete features
discrete_features = ['Pclass', 'Sex', 'Cabin']

In [3]:
import csv
print 'train file'
!head ./train.csv
print
print 'test file'
!head ./test.csv


train file
PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S
6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,E46,S
8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,,S
9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S

test file
PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,,S
894,2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,,Q
895,3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,,S
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,,S
897,3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,,S
898,3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,,Q
899,2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,,S
900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,,C

In [4]:
# we're going to do a hack here where we discretize the cabin locations

def convert_data(fname):
    Xcont, Xdis, Y = [], [], []
    with open(fname) as f:
        reader = csv.reader(f)
        header = reader.next()
        for i, row in enumerate(reader):
            if 'Survived' in header:
                Y.append(row[header.index('Survived')])
            cabin = row[header.index('Cabin')]
            if cabin:
                cabin = cabin.split(' ')[0]
                cabin = cabin[0]
            row[header.index('Cabin')] = cabin
            Xcont.append([row[header.index(n)] for n in continuous_features])
            Xdis.append([row[header.index(n)] if row[header.index(n)] else 'MISSING' for n in discrete_features])
    if len(Y):
        return Xcont, Xdis, Y 
    else: 
        return Xcont, Xdis

train_cont, train_dis, train_Y = convert_data('train.csv')
test_cont, test_dis = convert_data('test.csv')
print train_cont[:5]
print train_dis[:5]


[['22', '1', '0'], ['38', '1', '0'], ['26', '0', '0'], ['35', '1', '0'], ['35', '0', '0']]
[['3', 'male', 'MISSING'], ['1', 'female', 'C'], ['3', 'female', 'MISSING'], ['1', 'female', 'C'], ['3', 'male', 'MISSING']]

In [5]:
import numpy as np
np.random.seed(0)
# unfortunately, this dataset has empty data, hence we'll take a very simple approach and backfill 
# the continous features with the mean and the discrete features with the most common value

def back_fill_cont(cont, means):
    cols = zip(*cont)
    for i, col in enumerate(cols):
        valued = [float(v) for v in col if v != '']
        if means[i] is None:
            means[i] = np.mean(valued)
        col = list(col)
        for j, val in enumerate(col):
            if val == '':
                val = means[i]
            col[j] = float(val)
        cols[i] = col
    cols = zip(*cols)
    return np.array(cols)

means = [None for n in continuous_features]
Xtrain_cont = back_fill_cont(train_cont, means)
Xtest_cont = back_fill_cont(test_cont, means)

print 'backfilled'
print Xtrain_cont[:5]

def standarize(X, means=None, stds=None):
    if means is None:
        means = X.mean(axis=0)
    if stds is None:
        stds = X.std(axis=0)
    return (X-means[np.newaxis, :]) / stds[np.newaxis, :], means, stds

Xtrain_cont, means, stds = standarize(Xtrain_cont)
Xtest_cont, _, _ = standarize(Xtest_cont, means, stds)

print 'standardized'
print Xtrain_cont[:5]

Y = np.array(train_Y)


backfilled
[[ 22.   1.   0.]
 [ 38.   1.   0.]
 [ 26.   0.   0.]
 [ 35.   1.   0.]
 [ 35.   0.   0.]]
standardized
[[-0.5924806   0.43279337 -0.47367361]
 [ 0.63878901  0.43279337 -0.47367361]
 [-0.2846632  -0.4745452  -0.47367361]
 [ 0.40792596  0.43279337 -0.47367361]
 [ 0.40792596 -0.4745452  -0.47367361]]

In [6]:
from collections import Counter
from pystacks.utils.text.vocab import Vocab

def back_fill_dis(dis, most_commons):
    cols = zip(*dis)
    for i, col in enumerate(cols):
        valued = [v for v in col if v != '']
        if most_commons[i] is None:
            most_commons[i] = Counter(valued).most_common()[0][0]
        col = list(col)
        for j, val in enumerate(col):
            if val == '':
                val = most_commons[i]
            col[j] = val
        cols[i] = col
    cols = zip(*cols)
    return cols

most_commons = [None for n in discrete_features]
Xtrain_dis = back_fill_dis(train_dis, most_commons)
Xtest_dis = back_fill_dis(test_dis, most_commons)

print 'backfilled'
print Xtrain_dis[:5]

def numericalize(X, vocabs, add=False):
    cols = zip(*X)
    for i, col in enumerate(cols):
        vocab = vocabs[i]
        col = [vocab.add(val) for val in col] if add else [vocab[val] for val in col]
        cols[i] = col
    cols = zip(*cols)
    return np.array(cols)

vocabs = [Vocab() for f in discrete_features]
Xtrain_dis = numericalize(Xtrain_dis, vocabs, add=True)
Xtest_dis = numericalize(Xtest_dis, vocabs)

vocabs = {name:v for name, v in zip(discrete_features, vocabs)}

print 'vocab_size'
print [(name, len(v)) for name, v in vocabs.items()]
print 'numericalized'
print Xtrain_dis[:5]


backfilled
[('3', 'male', 'MISSING'), ('1', 'female', 'C'), ('3', 'female', 'MISSING'), ('1', 'female', 'C'), ('3', 'male', 'MISSING')]
vocab_size
[('Cabin', 9), ('Pclass', 3), ('Sex', 2)]
numericalized
[[0 0 0]
 [1 1 1]
 [0 1 0]
 [1 1 1]
 [0 0 0]]

In [7]:
# finally we will randomly split the training into train and dev to do validation
def split_train(X, Y, dev_portion=0.15):
    total = len(X)
    dev_count = int(dev_portion * total)
    inds = np.random.permutation(total)
    train_ind = inds[:total-dev_count]
    dev_ind = inds[total-dev_count:]
    return X[train_ind], Y[train_ind], X[dev_ind], Y[dev_ind]

Xtrain, Ytrain, Xdev, Ydev = split_train(np.concatenate([Xtrain_cont, Xtrain_dis], axis=1), Y.astype('int32'))
Xtrain_c = Xtrain[:, :len(continuous_features)].astype('float32')
Xtrain_d = Xtrain[:, len(continuous_features):].astype('int32')
Xdev_c = Xdev[:, :len(continuous_features)].astype('float32')
Xdev_d = Xdev[:, len(continuous_features):].astype('int32')
print 'train size', len(Xtrain), 'dev size', len(Xdev)


train size 758 dev size 133

In [8]:
# make model
from theano import function, tensor as T
from pystacks.layers.container import Sequential, Parallel
from pystacks.layers.lookup import LookupTable
from pystacks.layers.common import *
from pystacks.transformer import UnitNorm

emb_size = 5

# we'll need 1 lookup tables per discrete feature
lookups = [LookupTable(vocab_size=len(vocabs[n]), embedding_size=emb_size, E_transformer=UnitNorm()) for n in discrete_features]

Xdiscrete_sym = T.imatrix()
discrete_net = Parallel(lookups)
discrete_feat = discrete_net.forward(Xdiscrete_sym)
debug_discrete = function([Xdiscrete_sym], discrete_feat)


Couldn't import dot_parser, loading of dot files will not be possible.
Using gpu device 0: GeForce GTX 760

In [9]:
from pprint import pprint
pprint(discrete_net)


{'components': [{'E': LookupTable1.E,
                 'advanced_indexing': True,
                 'embedding_size': 5,
                 'n_in': 1,
                 'n_out': 5,
                 'name': 'LookupTable1',
                 'normalize_E': <theano.compile.function_module.Function object at 0x10d152750>,
                 'params': {'LookupTable1.E': {'grad_transformer': None,
                                               'initializer': <pystacks.initializer.RandomUniform object at 0x10d0d7050>,
                                               'regularizer': None,
                                               'transformer': <pystacks.transformer.UnitNorm object at 0x10ceecf90>,
                                               'var': LookupTable1.E}},
                 'vocab_size': 3,
                 'window_size': 1},
                {'E': LookupTable2.E,
                 'advanced_indexing': True,
                 'embedding_size': 5,
                 'n_in': 1,
                 'n_out': 5,
                 'name': 'LookupTable2',
                 'normalize_E': <theano.compile.function_module.Function object at 0x10d1aa3d0>,
                 'params': {'LookupTable2.E': {'grad_transformer': None,
                                               'initializer': <pystacks.initializer.RandomUniform object at 0x10d13d7d0>,
                                               'regularizer': None,
                                               'transformer': <pystacks.transformer.UnitNorm object at 0x10d13d5d0>,
                                               'var': LookupTable2.E}},
                 'vocab_size': 2,
                 'window_size': 1},
                {'E': LookupTable3.E,
                 'advanced_indexing': True,
                 'embedding_size': 5,
                 'n_in': 1,
                 'n_out': 5,
                 'name': 'LookupTable3',
                 'normalize_E': <theano.compile.function_module.Function object at 0x10d166c50>,
                 'params': {'LookupTable3.E': {'grad_transformer': None,
                                               'initializer': <pystacks.initializer.RandomUniform object at 0x10d1aa690>,
                                               'regularizer': None,
                                               'transformer': <pystacks.transformer.UnitNorm object at 0x10d1aa490>,
                                               'var': LookupTable3.E}},
                 'vocab_size': 9,
                 'window_size': 1}],
 'name': 'Parallel1',
 'params': {'Parallel1.LookupTable1.E': {'grad_transformer': None,
                                         'initializer': <pystacks.initializer.RandomUniform object at 0x10d0d7050>,
                                         'regularizer': None,
                                         'transformer': <pystacks.transformer.UnitNorm object at 0x10ceecf90>,
                                         'var': LookupTable1.E},
            'Parallel1.LookupTable2.E': {'grad_transformer': None,
                                         'initializer': <pystacks.initializer.RandomUniform object at 0x10d13d7d0>,
                                         'regularizer': None,
                                         'transformer': <pystacks.transformer.UnitNorm object at 0x10d13d5d0>,
                                         'var': LookupTable2.E},
            'Parallel1.LookupTable3.E': {'grad_transformer': None,
                                         'initializer': <pystacks.initializer.RandomUniform object at 0x10d1aa690>,
                                         'regularizer': None,
                                         'transformer': <pystacks.transformer.UnitNorm object at 0x10d1aa490>,
                                         'var': LookupTable3.E}}}

In [10]:
print 'discrete input'
print Xtrain_d[:5]
print 'discrete features'
print debug_discrete(Xtrain_d[:5])
print debug_discrete(Xtrain_d[:5]).shape

print 'continous input'
print Xtrain_c[:5]


discrete input
[[0 0 0]
 [0 0 0]
 [0 0 0]
 [1 1 6]
 [0 1 0]]
discrete features
[[-0.00891427  0.00375485 -0.00686882 -0.00664337 -0.00539214  0.00578529
  -0.00542891  0.00217903 -0.00460813 -0.00530658  0.00304885  0.00125801
   0.00810788  0.0037072  -0.0016283 ]
 [-0.00891427  0.00375485 -0.00686882 -0.00664337 -0.00539214  0.00578529
  -0.00542891  0.00217903 -0.00460813 -0.00530658  0.00304885  0.00125801
   0.00810788  0.0037072  -0.0016283 ]
 [-0.00891427  0.00375485 -0.00686882 -0.00664337 -0.00539214  0.00578529
  -0.00542891  0.00217903 -0.00460813 -0.00530658  0.00304885  0.00125801
   0.00810788  0.0037072  -0.0016283 ]
 [-0.00442762 -0.00572807  0.00214834  0.00022232  0.00301373 -0.00344326
  -0.00872849  0.00492772  0.0076262  -0.0096603  -0.00751393 -0.00476517
  -0.00401722 -0.00803768  0.00660028]
 [-0.00891427  0.00375485 -0.00686882 -0.00664337 -0.00539214 -0.00344326
  -0.00872849  0.00492772  0.0076262  -0.0096603   0.00304885  0.00125801
   0.00810788  0.0037072  -0.0016283 ]]
(5, 15)
continous input
[[  4.37434834e-15  -4.74545211e-01  -4.73673612e-01]
 [  4.37434834e-15  -4.74545211e-01  -4.73673612e-01]
 [ -1.74679589e+00   3.15480900e+00   7.67629862e-01]
 [  4.37434834e-15   4.32793379e-01  -4.73673612e-01]
 [ -5.38001433e-02  -4.74545211e-01   2.00893331e+00]]

In [11]:
n_in = len(discrete_features) * emb_size + len(continuous_features)
n_hid1 = 100
n_hid2 = 200
n_out = 2

Xcontinous_sym = T.fmatrix()
Xin_sym = T.concatenate([discrete_feat, Xcontinous_sym], axis=1)

net = Sequential([
        LinearLayer(n_in, n_hid1),
        ReLU(),
        Dropout(0.5),
        LinearLayer(n_hid1, n_hid2),
        ReLU(),
        Dropout(0.5),
        LinearLayer(n_hid2, n_out),
        Softmax()
    ])

prob_sym = net.forward(Xin_sym)

debug_prob = function([Xdiscrete_sym, Xcontinous_sym], prob_sym)

In [12]:
print 'probability estimates'
print debug_prob(Xtrain_d, Xtrain_c)


probability estimates
[[ 0.49239645  0.50760355]
 [ 0.47370665  0.52629335]
 [ 0.50849871  0.49150129]
 ..., 
 [ 0.49916452  0.50083548]
 [ 0.48159895  0.51840105]
 [ 0.47524599  0.52475401]]

In [13]:
from pystacks.optimizer import Adagrad
from pystacks.criteria import cross_entropy_loss

Y_sym = T.ivector()
lr_sym = T.fscalar()
loss_sym = cross_entropy_loss(prob_sym, Y_sym, one_hot_num_classes=2)
pred_sym = prob_sym.argmax(axis=1)

optimizer = Adagrad()

updates = net.grad_updates(loss=loss_sym, lr=lr_sym, optimizer=optimizer)
updates += discrete_net.grad_updates(loss=loss_sym, lr=lr_sym, optimizer=optimizer)

f_train = function([Xdiscrete_sym, Xcontinous_sym, Y_sym, lr_sym], loss_sym, updates=updates)
f_pred = function([Xdiscrete_sym, Xcontinous_sym], net.forward(Xin_sym, train=False).argmax(axis=1))


applying transform <pystacks.transformer.UnitNorm object at 0x10d13d5d0> to LookupTable2.E
applying transform <pystacks.transformer.UnitNorm object at 0x10ceecf90> to LookupTable1.E
applying transform <pystacks.transformer.UnitNorm object at 0x10d1aa490> to LookupTable3.E

In [14]:
pprint(net)


{'components': [{'W': LinearLayer1.W,
                 'b': LinearLayer1.b,
                 'n_in': 18,
                 'n_out': 100,
                 'name': 'LinearLayer1',
                 'params': {'LinearLayer1.W': {'grad_transformer': None,
                                               'initializer': <pystacks.initializer.Xavier object at 0x10d540390>,
                                               'regularizer': None,
                                               'transformer': None,
                                               'var': LinearLayer1.W},
                            'LinearLayer1.b': {'grad_transformer': None,
                                               'initializer': <pystacks.initializer.Zero object at 0x10d540490>,
                                               'regularizer': None,
                                               'transformer': None,
                                               'var': LinearLayer1.b}}},
                {'name': 'ReLU1', 'params': {}},
                {'keep_rate': 0.5,
                 'name': 'Dropout1',
                 'params': {},
                 'rng': <theano.tensor.shared_randomstreams.RandomStreams object at 0x10d540550>},
                {'W': LinearLayer2.W,
                 'b': LinearLayer2.b,
                 'n_in': 100,
                 'n_out': 200,
                 'name': 'LinearLayer2',
                 'params': {'LinearLayer2.W': {'grad_transformer': None,
                                               'initializer': <pystacks.initializer.Xavier object at 0x10d540810>,
                                               'regularizer': None,
                                               'transformer': None,
                                               'var': LinearLayer2.W},
                            'LinearLayer2.b': {'grad_transformer': None,
                                               'initializer': <pystacks.initializer.Zero object at 0x10d540850>,
                                               'regularizer': None,
                                               'transformer': None,
                                               'var': LinearLayer2.b}}},
                {'name': 'ReLU2', 'params': {}},
                {'keep_rate': 0.5,
                 'name': 'Dropout2',
                 'params': {},
                 'rng': <theano.tensor.shared_randomstreams.RandomStreams object at 0x10d5404d0>},
                {'W': LinearLayer3.W,
                 'b': LinearLayer3.b,
                 'n_in': 200,
                 'n_out': 2,
                 'name': 'LinearLayer3',
                 'params': {'LinearLayer3.W': {'grad_transformer': None,
                                               'initializer': <pystacks.initializer.Xavier object at 0x10d540b10>,
                                               'regularizer': None,
                                               'transformer': None,
                                               'var': LinearLayer3.W},
                            'LinearLayer3.b': {'grad_transformer': None,
                                               'initializer': <pystacks.initializer.Zero object at 0x10d540b50>,
                                               'regularizer': None,
                                               'transformer': None,
                                               'var': LinearLayer3.b}}},
                {'name': 'Softmax1', 'params': {}}],
 'name': 'Sequential1',
 'params': {'Sequential1.LinearLayer1.W': {'grad_transformer': None,
                                           'initializer': <pystacks.initializer.Xavier object at 0x10d540390>,
                                           'regularizer': None,
                                           'transformer': None,
                                           'var': LinearLayer1.W},
            'Sequential1.LinearLayer1.b': {'grad_transformer': None,
                                           'initializer': <pystacks.initializer.Zero object at 0x10d540490>,
                                           'regularizer': None,
                                           'transformer': None,
                                           'var': LinearLayer1.b},
            'Sequential1.LinearLayer2.W': {'grad_transformer': None,
                                           'initializer': <pystacks.initializer.Xavier object at 0x10d540810>,
                                           'regularizer': None,
                                           'transformer': None,
                                           'var': LinearLayer2.W},
            'Sequential1.LinearLayer2.b': {'grad_transformer': None,
                                           'initializer': <pystacks.initializer.Zero object at 0x10d540850>,
                                           'regularizer': None,
                                           'transformer': None,
                                           'var': LinearLayer2.b},
            'Sequential1.LinearLayer3.W': {'grad_transformer': None,
                                           'initializer': <pystacks.initializer.Xavier object at 0x10d540b10>,
                                           'regularizer': None,
                                           'transformer': None,
                                           'var': LinearLayer3.W},
            'Sequential1.LinearLayer3.b': {'grad_transformer': None,
                                           'initializer': <pystacks.initializer.Zero object at 0x10d540b50>,
                                           'regularizer': None,
                                           'transformer': None,
                                           'var': LinearLayer3.b}}}

In [15]:
print Xtest_dis[:5]


[[0 0 0]
 [0 1 0]
 [2 0 0]
 [0 0 0]
 [0 1 0]]

In [16]:
max_epoch = 200
lr = 3e-3

best_acc = 0
best_pred = None

train_accs, dev_accs = [], []

for epoch in xrange(max_epoch + 1):
    loss = f_train(Xtrain_d, Xtrain_c, Ytrain, lr)
    train_pred = f_pred(Xtrain_d, Xtrain_c)
    dev_pred = f_pred(Xdev_d, Xdev_c)
    train_acc = np.mean(Ytrain == train_pred)
    dev_acc = np.mean(Ydev == dev_pred)
    train_accs.append(train_acc)
    dev_accs.append(dev_acc)
    
    if epoch % 10 == 0:
        print '*' * 10 + ' epoch', epoch, 'loss', loss, 'train', train_acc, 'dev', dev_acc
    
    if dev_acc > best_acc:
        print 'new best', dev_acc, 'at epoch', epoch
        best_acc = dev_acc
        best_pred = f_pred(Xtest_dis.astype('int32'), Xtest_cont.astype('float32'))


********** epoch 0 loss 0.702873882279 train 0.68073878628 dev 0.654135338346
new best 0.654135338346 at epoch 0
********** epoch 10 loss 0.598257507762 train 0.704485488127 dev 0.609022556391
new best 0.661654135338 at epoch 14
new best 0.669172932331 at epoch 17
new best 0.676691729323 at epoch 18
new best 0.684210526316 at epoch 19
********** epoch 20 loss 0.542930591252 train 0.774406332454 dev 0.699248120301
new best 0.699248120301 at epoch 20
new best 0.714285714286 at epoch 21
new best 0.721804511278 at epoch 22
new best 0.736842105263 at epoch 23
new best 0.744360902256 at epoch 25
new best 0.751879699248 at epoch 27
********** epoch 30 loss 0.502081304063 train 0.803430079156 dev 0.744360902256
new best 0.759398496241 at epoch 33
new best 0.766917293233 at epoch 35
new best 0.774436090226 at epoch 37
new best 0.781954887218 at epoch 38
********** epoch 40 loss 0.477435899875 train 0.811345646438 dev 0.781954887218
new best 0.834586466165 at epoch 43
********** epoch 50 loss 0.463049583171 train 0.816622691293 dev 0.834586466165
********** epoch 60 loss 0.453783046446 train 0.817941952507 dev 0.834586466165
new best 0.84962406015 at epoch 64
********** epoch 70 loss 0.451658676968 train 0.81926121372 dev 0.84962406015
********** epoch 80 loss 0.436902627369 train 0.824538258575 dev 0.842105263158
********** epoch 90 loss 0.442674689335 train 0.824538258575 dev 0.842105263158
********** epoch 100 loss 0.435655721078 train 0.824538258575 dev 0.842105263158
********** epoch 110 loss 0.437657104915 train 0.824538258575 dev 0.842105263158
********** epoch 120 loss 0.429776729134 train 0.828496042216 dev 0.842105263158
********** epoch 130 loss 0.427552891383 train 0.828496042216 dev 0.842105263158
********** epoch 140 loss 0.422678638153 train 0.82981530343 dev 0.842105263158
********** epoch 150 loss 0.429531928368 train 0.828496042216 dev 0.842105263158
********** epoch 160 loss 0.426880521931 train 0.828496042216 dev 0.842105263158
********** epoch 170 loss 0.428408942486 train 0.825857519789 dev 0.842105263158
********** epoch 180 loss 0.422503900166 train 0.827176781003 dev 0.842105263158
********** epoch 190 loss 0.419565773477 train 0.827176781003 dev 0.842105263158
********** epoch 200 loss 0.423921467956 train 0.828496042216 dev 0.842105263158

In [17]:
import matplotlib.pylab as P
fig, ax = P.subplots()
ax.plot(train_accs, label='train', color='b')
ax.plot(dev_accs, label='dev', color='r')
ax.set_ylabel('accuracy')
ax.set_xlabel('epoch')
ax.set_title('learning curve')
ax.legend(loc='lower right')


Out[17]:
<matplotlib.legend.Legend at 0x10f711e90>

In [18]:
with open('test.csv') as f_in, open('test.pred.csv', 'wb') as f_out:
    reader = csv.reader(f_in)
    writer = csv.writer(f_out)
    header = reader.next()
    writer.writerow( ('PassengerId', 'Survived') )
    for row, survived in zip(reader, best_pred):
        writer.writerow( (row[header.index('PassengerId')], survived))

In [19]:
!wc -l 'test.pred.csv'
!head 'test.pred.csv'


     419 test.pred.csv
PassengerId,Survived
892,0
893,0
894,0
895,0
896,1
897,0
898,1
899,0
900,1

In [ ]: