A Bayesian Approach to the Titanic Data Set

A recent obsession of mine has been Bayesian Neural Networks. We will apply this approach in two ways. First in a pure NN solution which utilizes dropout. Second, we'll use edward to express our uncertainty over the weights and sample from the posterior.


In [3]:
# core python
from itertools import product
import re

# Data Structures
import pandas as pd
import numpy as np

# Data Visualization
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

# Prediction
import tensorflow as tf
import edward as ed
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelBinarizer, StandardScaler
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import Adam
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from edward.models import Normal
np.random.seed(606)

The following functions encompass a data cleaning pipeline. The function preproc at the end wraps the rest so that a single function call will return the desired data set.


In [4]:
# Imports data and splits Data into training and test sets
def split_and_clean():
    X, y = select_features(pd.read_csv('train.csv'))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 606, stratify = y)
    return X_train, y_train, X_test, y_test

# Select the features of interest. 
def select_features(data):
    target = ['Survived']
    features = ['Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']
    dropped_features = ['Cabin', 'Ticket']
    X = data[features].drop(dropped_features, axis=1)
    y = data[target]
    return X, y

# Fill na's with the mean (in the case of fare), and with C in the case of embarked.
def fix_na(data):
    na_vars = {"Fare" : data.Fare.mean(), "Embarked" : "C"}
    return data.fillna(na_vars)

# Processes categorical data into dummy vars
def create_dummies(data, cat_vars, cat_types):
    cat_data = data[cat_vars].values
    for i in range(len(cat_vars)):   
        bins = LabelBinarizer().fit_transform(cat_data[:, 0].astype(cat_types[i]))
        cat_data = np.delete(cat_data, 0, axis=1)
        cat_data = np.column_stack((cat_data, bins))
    return cat_data

# Processes numeric data 
def standardize(data, real_vars):
    real_data = data[real_vars]
    scale = StandardScaler()
    return scale.fit_transform(real_data)

# Extract titles from the Name field and create appropriate One Hot Encoded Columns
def extract_titles(data):
    title_array = data.Name
    first_names = title_array.str.rsplit(', ', expand=True, n=1)
    titles = first_names[1].str.rsplit('.', expand=True, n=1)
    known_titles = ['Mr', 'Mrs', 'Miss', 'Master', 'Don', 'Rev', 'Dr', 'Mme', 'Ms',
       'Major', 'Lady', 'Sir', 'Mlle', 'Col', 'Capt', 'the Countess',
       'Jonkheer']
    for title in known_titles:
        try:
            titles[title] = titles[0].str.contains(title).astype('int')
        except:
            titles[title] = 0
    return titles.drop([0,1], axis=1).values

# Multilayer Perceptron for filling in ages
def age_model(features=27, n_layers=15, n_hidden = 256, dropout = 0.25, optimizer=Adam()):
    model = Sequential()
    model.add(Dense(n_hidden, input_shape=(features, ), activation='relu', kernel_initializer='random_normal'))
    model.add(Dropout(dropout))    
    for i in range(n_layers):
        model.add(Dense(n_hidden, activation='relu'))
        model.add(Dropout(dropout))           
    model.add(Dense(1))   
    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mae'])
    return model

# Train the age model and fill in those missing values in the dataset
def impute_ages(data):
    known = data[np.isnan(data[:, -1].astype('float'))==False]
    unknown = data[np.isnan(data[:, -1].astype('float'))]
    y = known[:, -1]
    X = known[:, :-1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 606)
    model = age_model()
    model.fit(X_train, y_train, batch_size=64, epochs = 50,
              verbose = 0, validation_split = 0.2)
    ages_predicted = model.predict(unknown[:, :-1])
    data[np.isnan(data[:, -1].astype('float'))] = ages_predicted
    return data

# Executes the full preprocessing pipeline.
def preproc():
    # Import Data & Split
    X_train_, y_train, X_test_, y_test = split_and_clean()
    # Fill NAs
    X_train, X_test = fix_na(X_train_), fix_na(X_test_)
    # Preproc Categorical Vars
    cat_vars = ['Pclass', 'Sex', 'Embarked']
    cat_types = ['int', 'str', 'str']
    X_train_cat, X_test_cat = create_dummies(X_train, cat_vars, cat_types), create_dummies(X_test, cat_vars, cat_types)
    # Preprocess Numeric Vars
    real_vars = ['Fare', 'SibSp', 'Parch']
    X_train_real, X_test_real = standardize(X_train, real_vars), standardize(X_test, real_vars)
    # Extract Titles
    X_train_titles, X_test_titles = extract_titles(X_train), extract_titles(X_test)
    # Recombine
    X_train, X_test = np.column_stack((X_train_cat, X_train_real, X_train_titles, X_train_.Age)), np.column_stack((X_test_cat, X_test_real, X_test_titles, X_test.Age))
    # Fill Missing Ages
    X_train, X_test = impute_ages(X_train), impute_ages(X_test)
    return X_train, np_utils.to_categorical(y_train.values), X_test, np_utils.to_categorical(y_test.values)

Run the preproc pipeline


In [5]:
X_train, y_train, X_test, y_test = preproc()

Now we can build a Keras model. At the top we define a series of variables that we'll use in the model.


In [63]:
def create_model(features=28, n_layers=10, n_hidden = 64, dropout = 0.3, optimizer=Adam()):
    model = Sequential()
    model.add(Dense(n_hidden, input_shape=(features, ), activation='relu', kernel_initializer='random_normal'))
    model.add(Dropout(dropout))    
    for i in range(n_layers):
        model.add(Dense(n_hidden, activation='relu'))
        model.add(Dropout(dropout))           
    model.add(Dense(2, activation='softmax'))   
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])
    return model

The last thing to do is to compile and fit the model.


In [6]:
def fit_model(n_layers=14, n_hidden=64, dropout=0.3, epochs=200):
    model = create_model(features=X_train.shape[1], n_layers=n_layers, n_hidden=n_hidden, dropout=dropout)
    model.fit(X_train, y_train , epochs=epochs, batch_size = 64)
    return model

param_grid = {
    'n_layers' : [5, 10],
    'n_hidden' : [25, 75],
    'dropout'  : [0.25, 0.35]
}

In [7]:
model = KerasClassifier(build_fn=create_model, verbose=1)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, y_train)


Epoch 1/10
445/445 [==============================] - 0s - loss: 0.6903 - acc: 0.5865      
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6734 - acc: 0.6292     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6555 - acc: 0.6315     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.6310 - acc: 0.6315     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.6073 - acc: 0.6315     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.5923 - acc: 0.6315     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5906 - acc: 0.6315     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.5668 - acc: 0.6315     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.5662 - acc: 0.6315     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.5364 - acc: 0.6337     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 0s - loss: 0.6892 - acc: 0.5843     
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6779 - acc: 0.5820     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6716 - acc: 0.5865     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.6519 - acc: 0.5865     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.6155 - acc: 0.5910     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.6192 - acc: 0.6112     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5723 - acc: 0.7371     
Epoch 8/10
445/445 [==============================] - ETA: 0s - loss: 0.5158 - acc: 0.906 - 0s - loss: 0.5598 - acc: 0.7708     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.5171 - acc: 0.7708     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.5161 - acc: 0.7775     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
446/446 [==============================] - 0s - loss: 0.6847 - acc: 0.5942      
Epoch 2/10
446/446 [==============================] - 0s - loss: 0.6455 - acc: 0.6323     
Epoch 3/10
446/446 [==============================] - 0s - loss: 0.6038 - acc: 0.6323     
Epoch 4/10
446/446 [==============================] - 0s - loss: 0.5547 - acc: 0.6323     
Epoch 5/10
446/446 [==============================] - 0s - loss: 0.5268 - acc: 0.6435     
Epoch 6/10
446/446 [==============================] - 0s - loss: 0.5221 - acc: 0.7556     
Epoch 7/10
446/446 [==============================] - 0s - loss: 0.4843 - acc: 0.8274     
Epoch 8/10
446/446 [==============================] - 0s - loss: 0.4831 - acc: 0.8251     
Epoch 9/10
446/446 [==============================] - 0s - loss: 0.4736 - acc: 0.8027     
Epoch 10/10
446/446 [==============================] - 0s - loss: 0.4553 - acc: 0.8341     
 32/446 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 0s - loss: 0.6752 - acc: 0.6292      
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6587 - acc: 0.6315     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6347 - acc: 0.6315     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.5733 - acc: 0.6315     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.5541 - acc: 0.6315     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.5614 - acc: 0.7303     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5991 - acc: 0.7775     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.5678 - acc: 0.7258     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.5389 - acc: 0.7978     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.5367 - acc: 0.8157     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 0s - loss: 0.6853 - acc: 0.5708      
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6653 - acc: 0.5865     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6485 - acc: 0.5865     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.6040 - acc: 0.5865     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.5692 - acc: 0.7483     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.5405 - acc: 0.7843     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5601 - acc: 0.7640     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.5641 - acc: 0.7708     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.5619 - acc: 0.7753     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.5630 - acc: 0.7528     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
446/446 [==============================] - 1s - loss: 0.6688 - acc: 0.6188      
Epoch 2/10
446/446 [==============================] - 0s - loss: 0.6460 - acc: 0.6345     
Epoch 3/10
446/446 [==============================] - 0s - loss: 0.6018 - acc: 0.6323     
Epoch 4/10
446/446 [==============================] - 0s - loss: 0.5846 - acc: 0.6457     
Epoch 5/10
446/446 [==============================] - 0s - loss: 0.5576 - acc: 0.7422     
Epoch 6/10
446/446 [==============================] - 0s - loss: 0.5186 - acc: 0.7623     
Epoch 7/10
446/446 [==============================] - 0s - loss: 0.5278 - acc: 0.7915     
Epoch 8/10
446/446 [==============================] - 0s - loss: 0.5251 - acc: 0.7825     
Epoch 9/10
446/446 [==============================] - 0s - loss: 0.5795 - acc: 0.7937     
Epoch 10/10
446/446 [==============================] - 0s - loss: 0.4717 - acc: 0.8206     
 32/446 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 0s - loss: 0.6426 - acc: 0.6202      
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.5609 - acc: 0.7483     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.5336 - acc: 0.7798     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.4329 - acc: 0.8337     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.4409 - acc: 0.8292     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.4244 - acc: 0.8404     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.4042 - acc: 0.8517     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.4165 - acc: 0.8382     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.3905 - acc: 0.8472     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.3824 - acc: 0.8539     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 1s - loss: 0.6741 - acc: 0.5843      
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6052 - acc: 0.7169     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.5319 - acc: 0.7798     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.4891 - acc: 0.8112     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.4523 - acc: 0.8225     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.4285 - acc: 0.8292     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.4425 - acc: 0.8270     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.4221 - acc: 0.8315     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.4237 - acc: 0.8225     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.4114 - acc: 0.8360     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
446/446 [==============================] - 0s - loss: 0.6323 - acc: 0.6188      
Epoch 2/10
446/446 [==============================] - 0s - loss: 0.5103 - acc: 0.7623     
Epoch 3/10
446/446 [==============================] - 0s - loss: 0.4326 - acc: 0.8229     
Epoch 4/10
446/446 [==============================] - 0s - loss: 0.4264 - acc: 0.8184     
Epoch 5/10
446/446 [==============================] - 0s - loss: 0.4087 - acc: 0.8206     
Epoch 6/10
446/446 [==============================] - 0s - loss: 0.3892 - acc: 0.8498     
Epoch 7/10
446/446 [==============================] - 0s - loss: 0.4094 - acc: 0.8520     
Epoch 8/10
446/446 [==============================] - 0s - loss: 0.3753 - acc: 0.8475     
Epoch 9/10
446/446 [==============================] - 0s - loss: 0.3834 - acc: 0.8408     
Epoch 10/10
446/446 [==============================] - 0s - loss: 0.3586 - acc: 0.8475     
 32/446 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 1s - loss: 0.6688 - acc: 0.6202     
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6120 - acc: 0.6584     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.5453 - acc: 0.7528     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.5400 - acc: 0.8045     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.4879 - acc: 0.8022     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.4910 - acc: 0.8022     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5002 - acc: 0.8180     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.4685 - acc: 0.8292     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.4234 - acc: 0.8360     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.4197 - acc: 0.8494     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 1s - loss: 0.6834 - acc: 0.5978     
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6552 - acc: 0.6202     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6443 - acc: 0.6090     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.5367 - acc: 0.7888     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.4944 - acc: 0.7910     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.5243 - acc: 0.8180     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5227 - acc: 0.7798     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.5466 - acc: 0.7955     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.5220 - acc: 0.7910     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.4952 - acc: 0.8000     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
446/446 [==============================] - 1s - loss: 0.6612 - acc: 0.6323     
Epoch 2/10
446/446 [==============================] - 0s - loss: 0.6452 - acc: 0.6323     
Epoch 3/10
446/446 [==============================] - 0s - loss: 0.5418 - acc: 0.7040     
Epoch 4/10
446/446 [==============================] - 0s - loss: 0.4899 - acc: 0.8139     
Epoch 5/10
446/446 [==============================] - 0s - loss: 0.4979 - acc: 0.8274     
Epoch 6/10
446/446 [==============================] - 0s - loss: 0.4822 - acc: 0.8251     
Epoch 7/10
446/446 [==============================] - 0s - loss: 0.4705 - acc: 0.8318     
Epoch 8/10
446/446 [==============================] - 0s - loss: 0.4357 - acc: 0.8386     
Epoch 9/10
446/446 [==============================] - 0s - loss: 0.4601 - acc: 0.8341     
Epoch 10/10
446/446 [==============================] - 0s - loss: 0.4066 - acc: 0.8430     
 32/446 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 1s - loss: 0.6627 - acc: 0.6045      
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6382 - acc: 0.6292     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6070 - acc: 0.6315     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.5817 - acc: 0.6315     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.5773 - acc: 0.7213     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.5979 - acc: 0.7753     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5468 - acc: 0.7910     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.5509 - acc: 0.7955     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.5670 - acc: 0.8247     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.5378 - acc: 0.8202     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 1s - loss: 0.6884 - acc: 0.5551      
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6757 - acc: 0.5775     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6658 - acc: 0.5888     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.6401 - acc: 0.5820     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.6055 - acc: 0.6045     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.6031 - acc: 0.6921     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5649 - acc: 0.7461     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.5677 - acc: 0.7551     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.5472 - acc: 0.7843     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.5400 - acc: 0.7730     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
446/446 [==============================] - 1s - loss: 0.6893 - acc: 0.5561      
Epoch 2/10
446/446 [==============================] - 0s - loss: 0.6531 - acc: 0.6502     
Epoch 3/10
446/446 [==============================] - 0s - loss: 0.6283 - acc: 0.6839     
Epoch 4/10
446/446 [==============================] - 0s - loss: 0.5392 - acc: 0.7511     
Epoch 5/10
446/446 [==============================] - 0s - loss: 0.5245 - acc: 0.7713     
Epoch 6/10
446/446 [==============================] - 0s - loss: 0.5056 - acc: 0.7668     
Epoch 7/10
446/446 [==============================] - 0s - loss: 0.4804 - acc: 0.7758     
Epoch 8/10
446/446 [==============================] - 0s - loss: 0.4527 - acc: 0.8206     
Epoch 9/10
446/446 [==============================] - 0s - loss: 0.4601 - acc: 0.8004     
Epoch 10/10
446/446 [==============================] - 0s - loss: 0.4205 - acc: 0.8206     
 32/446 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 1s - loss: 0.6661 - acc: 0.6225      
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6482 - acc: 0.6315     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6272 - acc: 0.6337     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.6150 - acc: 0.6315     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.5973 - acc: 0.6315     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.5943 - acc: 0.6292     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5804 - acc: 0.7573     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.5605 - acc: 0.7438     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.5727 - acc: 0.7933     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.5959 - acc: 0.7798     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 1s - loss: 0.6850 - acc: 0.5640     
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6817 - acc: 0.5775     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6770 - acc: 0.5843     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.6717 - acc: 0.5798     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.6555 - acc: 0.5843     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.6311 - acc: 0.5843     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.6175 - acc: 0.5978     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.6531 - acc: 0.5978     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.6303 - acc: 0.7146     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.6019 - acc: 0.7438     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
446/446 [==============================] - 2s - loss: 0.6798 - acc: 0.6031      
Epoch 2/10
446/446 [==============================] - 0s - loss: 0.6520 - acc: 0.6300     
Epoch 3/10
446/446 [==============================] - 0s - loss: 0.6309 - acc: 0.6278     
Epoch 4/10
446/446 [==============================] - 0s - loss: 0.5925 - acc: 0.6323     
Epoch 5/10
446/446 [==============================] - 0s - loss: 0.5566 - acc: 0.6480     
Epoch 6/10
446/446 [==============================] - 0s - loss: 0.5555 - acc: 0.7466     
Epoch 7/10
446/446 [==============================] - 0s - loss: 0.5352 - acc: 0.7377     
Epoch 8/10
446/446 [==============================] - 0s - loss: 0.5234 - acc: 0.7332     
Epoch 9/10
446/446 [==============================] - 0s - loss: 0.5171 - acc: 0.7803     
Epoch 10/10
446/446 [==============================] - 0s - loss: 0.4983 - acc: 0.7758     
 32/446 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 1s - loss: 0.6671 - acc: 0.6090     
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6018 - acc: 0.6247     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.5534 - acc: 0.7416     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.5568 - acc: 0.7685     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.4777 - acc: 0.8067     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.4931 - acc: 0.8247     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.4774 - acc: 0.8135     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.4493 - acc: 0.8360     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.4246 - acc: 0.8292     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.4280 - acc: 0.8315     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 1s - loss: 0.6766 - acc: 0.5820      
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6164 - acc: 0.6697     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.5574 - acc: 0.7708     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.5222 - acc: 0.7685     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.4790 - acc: 0.7888     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.4850 - acc: 0.8045     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.4697 - acc: 0.8067     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.4868 - acc: 0.7888     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.4376 - acc: 0.8247     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.4807 - acc: 0.8135     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
446/446 [==============================] - 1s - loss: 0.6596 - acc: 0.6099     
Epoch 2/10
446/446 [==============================] - 0s - loss: 0.5760 - acc: 0.6390     
Epoch 3/10
446/446 [==============================] - 0s - loss: 0.5125 - acc: 0.7982     
Epoch 4/10
446/446 [==============================] - 0s - loss: 0.4783 - acc: 0.8161     
Epoch 5/10
446/446 [==============================] - 0s - loss: 0.4404 - acc: 0.8229     
Epoch 6/10
446/446 [==============================] - 0s - loss: 0.4680 - acc: 0.8206     
Epoch 7/10
446/446 [==============================] - 0s - loss: 0.4126 - acc: 0.8161     
Epoch 8/10
446/446 [==============================] - 0s - loss: 0.4032 - acc: 0.8206     
Epoch 9/10
446/446 [==============================] - 0s - loss: 0.4033 - acc: 0.8117     
Epoch 10/10
446/446 [==============================] - 0s - loss: 0.4010 - acc: 0.8161     
 32/446 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 3s - loss: 0.6763 - acc: 0.6090     
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.6825 - acc: 0.6225     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6125 - acc: 0.6315     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.5894 - acc: 0.6315     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.5835 - acc: 0.7258     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.5695 - acc: 0.7820     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5071 - acc: 0.8090     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.5264 - acc: 0.8022     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.4992 - acc: 0.8157     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.5414 - acc: 0.8202     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
445/445 [==============================] - 2s - loss: 0.6685 - acc: 0.5596     
Epoch 2/10
445/445 [==============================] - 0s - loss: 0.5795 - acc: 0.7213     
Epoch 3/10
445/445 [==============================] - 0s - loss: 0.6477 - acc: 0.7169     
Epoch 4/10
445/445 [==============================] - 0s - loss: 0.5521 - acc: 0.7596     
Epoch 5/10
445/445 [==============================] - 0s - loss: 0.5827 - acc: 0.7708     
Epoch 6/10
445/445 [==============================] - 0s - loss: 0.6152 - acc: 0.7303     
Epoch 7/10
445/445 [==============================] - 0s - loss: 0.5473 - acc: 0.7753     
Epoch 8/10
445/445 [==============================] - 0s - loss: 0.4890 - acc: 0.8180     
Epoch 9/10
445/445 [==============================] - 0s - loss: 0.5046 - acc: 0.8067     
Epoch 10/10
445/445 [==============================] - 0s - loss: 0.5150 - acc: 0.8090     
 32/445 [=>............................] - ETA: 0sEpoch 1/10
446/446 [==============================] - 2s - loss: 0.6587 - acc: 0.6099     
Epoch 2/10
446/446 [==============================] - 0s - loss: 0.5446 - acc: 0.6861     
Epoch 3/10
446/446 [==============================] - 0s - loss: 0.5667 - acc: 0.7960     
Epoch 4/10
446/446 [==============================] - 0s - loss: 0.5535 - acc: 0.7646     
Epoch 5/10
446/446 [==============================] - 0s - loss: 0.4675 - acc: 0.8274     
Epoch 6/10
446/446 [==============================] - 0s - loss: 0.4688 - acc: 0.8184     
Epoch 7/10
446/446 [==============================] - 0s - loss: 0.4972 - acc: 0.8318     
Epoch 8/10
446/446 [==============================] - 0s - loss: 0.4764 - acc: 0.8094     
Epoch 9/10
446/446 [==============================] - 0s - loss: 0.4689 - acc: 0.8049     
Epoch 10/10
446/446 [==============================] - 0s - loss: 0.4605 - acc: 0.8094     
 32/446 [=>............................] - ETA: 0sEpoch 1/10
668/668 [==============================] - 2s - loss: 0.6271 - acc: 0.6632     
Epoch 2/10
668/668 [==============================] - 0s - loss: 0.4981 - acc: 0.7934     
Epoch 3/10
668/668 [==============================] - 0s - loss: 0.4675 - acc: 0.8054     
Epoch 4/10
668/668 [==============================] - 0s - loss: 0.4176 - acc: 0.8308     
Epoch 5/10
668/668 [==============================] - 0s - loss: 0.4242 - acc: 0.8144     
Epoch 6/10
668/668 [==============================] - 0s - loss: 0.4247 - acc: 0.8338     
Epoch 7/10
668/668 [==============================] - 0s - loss: 0.4162 - acc: 0.8533     
Epoch 8/10
668/668 [==============================] - 0s - loss: 0.4016 - acc: 0.8578     
Epoch 9/10
668/668 [==============================] - 0s - loss: 0.3992 - acc: 0.8488     
Epoch 10/10
668/668 [==============================] - 0s - loss: 0.4004 - acc: 0.8413     

In [ ]:
best_model = grid_result.best_estimator_.model
train_score, train_accuracy = best_model.evaluate(X_train, y_train)
test_score, test_accuracy = best_model.evaluate(X_test, y_test)
print('Training Score: {0}, Trainng Accuracy: {1}'.format(train_score, train_accuracy))
print('Test Score: {0}, Test Accuracy: {1}'.format(test_score, test_accuracy))

In [ ]:
print(grid_result.best_params_)
plt.hist(grid_result.cv_results_['mean_train_score'])

In [17]:
age_data = pd.read_csv('train.csv')
age_unknown = age_data[age_data.isnull()]
age_known = age_data[age_data.isnull() == False]


Out[17]:
0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
6      54.0
7       2.0
8      27.0
9      14.0
10      4.0
11     58.0
12     20.0
13     39.0
14     14.0
15     55.0
16      2.0
18     31.0
20     35.0
21     34.0
22     15.0
23     28.0
24      8.0
25     38.0
27     19.0
30     40.0
33     66.0
34     28.0
35     42.0
37     21.0
38     18.0
       ... 
856    45.0
857    51.0
858    24.0
860    41.0
861    21.0
862    48.0
864    24.0
865    42.0
866    27.0
867    31.0
869     4.0
870    26.0
871    47.0
872    33.0
873    47.0
874    28.0
875    15.0
876    20.0
877    19.0
879    56.0
880    25.0
881    33.0
882    22.0
883    28.0
884    25.0
885    39.0
886    27.0
887    19.0
889    26.0
890    32.0
Name: Age, Length: 714, dtype: float64

In [ ]:
def preproc_testing():
    X = pd.read_csv('test.csv')
    # Fill NAs
    X = fix_na(X)
    # Preproc Categorical Vars
    cat_vars = ['Pclass', 'Sex', 'Embarked']
    cat_types = ['int', 'str', 'str']
    X_cat = create_dummies(X, cat_vars, cat_types)
    # Preprocess Numeric Vars
    real_vars = ['Age', 'Fare', 'SibSp', 'Parch']
    X_real = standardize(X, real_vars) 
    # Extract Titles
    X_titles = extract_titles(X)
    # Recombine
    X = np.column_stack((X_cat, X_real, X_titles))
    return X

In [ ]:
testing = preproc_testing()
prediction = grid_result.predict(testing)

In [ ]:
submission = pd.DataFrame()
submission['PassengerId'] = pd.read_csv('test.csv').PassengerId
submission['Survived'] = prediction

In [ ]:
submission.to_csv('keras_titanic.csv', index=False)

In [ ]: