Pima indians diabetes dataset


In [5]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import time

In [6]:
# 1. Number of times pregnant
# 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test
# 3. Diastolic blood pressure (mm Hg)
# 4. Triceps skin fold thickness (mm)
# 5. 2-Hour serum insulin (mu U/ml)
# 6. Body mass index (weight in kg/(height in m)^2)
# 7. Diabetes pedigree function
# 8. Age (years)
# 9. Class variable (0 or 1)

names = ["#of preg", "gluc_conc", "blood_pressure", "skin_thickness", "insulin_conc",
        "BMI", "DPF", "age", "class"]

df = pd.read_csv('data/pima-indians-diabetes.csv', names=names)

df.head()


Out[6]:
#of preg gluc_conc blood_pressure skin_thickness insulin_conc BMI DPF age class
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
2 8 183 64 0 0 23.3 0.672 32 1
3 1 89 66 23 94 28.1 0.167 21 0
4 0 137 40 35 168 43.1 2.288 33 1

In [7]:
X = df[df.columns[:-1].values]
y = df["class"].values

In [8]:
scaler = StandardScaler()
X_scale = scaler.fit_transform(X)

In [9]:
def create_model():
    model = Sequential()
    model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))
    model.add(Dense(8, kernel_initializer='uniform', activation="relu"))
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [10]:
np.random.seed(7)

In [11]:
start = time.time()
model = KerasClassifier(build_fn=create_model, epochs=150, batch_size=10, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
results = cross_val_score(model, X_scale, y, cv=10)
print(results.mean())
print(time.time() - start)


0.769600135842
198.7945487499237

In [12]:
from keras.callbacks import Callback

Live ploting with Bokeh


In [13]:
from bokeh.io import push_notebook, output_notebook
from bokeh.layouts import row, widgetbox, column
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import PreText
from bokeh.plotting import figure, show
output_notebook()


Loading BokehJS ...

In [14]:
source = ColumnDataSource(data=dict(x=[], y=[], z=[]))

plot = figure(plot_height=250, plot_width=700)

plot.circle('x', 'y', source=source)
plot.line('x', 'y', source=source, color='red')
plot.circle('x', 'z', source=source)
plot.line('x', 'z', source=source, color='green')
show(plot, notebook_handle=True)

new_data = {
    'x' : [],
    'y' : [],
    'z' : []
}

class TrainingHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.i = 1        

    def on_epoch_end(self, batch, logs={}):        
        self.losses.append(logs.get('loss'))
        new_data['x'] = [self.i]
        new_data['y'] = [logs.get('loss')]
        new_data['z'] = [logs.get('acc')]
        source.stream(new_data, rollover=30)
        # text_input.text = "Progress: " + str(self.i/50.0 * 100)
        push_notebook()
        self.i += 1
        
            
history = TrainingHistory()

def create_model():
    model = Sequential()
    model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))
    model.add(Dense(8, kernel_initializer='uniform', activation="relu"))
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = create_model()

model.fit(X_scale, y, epochs=250, batch_size=5, verbose=0, callbacks=[history])


Out[14]:
<keras.callbacks.History at 0x7f657bf2acf8>

In [17]:
from sklearn.model_selection import GridSearchCV

In [18]:
def create_model(optimizer="rmsprop", init="glorot_uniform"):
    model = Sequential()
    model.add(Dense(12, input_dim=X.shape[1], kernel_initializer=init, activation='relu'))
    model.add(Dense(8, kernel_initializer=init, activation='relu'))
    model.add(Dense(1, kernel_initializer=init, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [19]:
model = KerasClassifier(build_fn=create_model, verbose=0)
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform', 'uniform', 'normal']
epochs = np.arange(50, 300, 50)
batches = np.array([5, 10, 25, 32])

In [ ]:
param_grid = dict(optimizer=optimizers, epochs=epochs, init=init, batch_size=batches)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_results = grid.fit(X_scale, y)

In [ ]:
print("IT TOOK: {} minutes".format((time.time() - start)/60))

print("Best: %f using %s" % (grid_results.best_score_, grid_results.best_params_))
for params, mean_score, scores in grid_results.grid_scores_:
    print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))

IT TOOK: 128.50399666229885 minutes (Google Cloud, 8 CPU)

Best: 0.778646 using {'batch_size': 5, 'epochs': 150, 'init': 'uniform', 'optimizer': 'rmsprop'}

0.766927 (0.047771) with: {'batch_size': 5, 'epochs': 50, 'init': 'glorot_uniform', 'optimizer': 'rmsprop'} 0.761719 (0.027621) with: {'batch_size': 5, 'epochs': 50, 'init': 'glorot_uniform', 'optimizer': 'adam'} 0.766927 (0.032578) with: {'batch_size': 5, 'epochs': 50, 'init': 'uniform', 'optimizer': 'rmsprop'} 0.769531 (0.033754) with: {'batch_size': 5, 'epochs': 50, 'init': 'uniform', 'optimizer': 'adam'} 0.773438 (0.031412) with: {'batch_size': 5, 'epochs': 50, 'init': 'normal', 'optimizer': 'rmsprop'} ........