Pima indians diabetes dataset



In [5]:

    
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import time



In [6]:

    
# 1. Number of times pregnant
# 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test
# 3. Diastolic blood pressure (mm Hg)
# 4. Triceps skin fold thickness (mm)
# 5. 2-Hour serum insulin (mu U/ml)
# 6. Body mass index (weight in kg/(height in m)^2)
# 7. Diabetes pedigree function
# 8. Age (years)
# 9. Class variable (0 or 1)

names = ["#of preg", "gluc_conc", "blood_pressure", "skin_thickness", "insulin_conc",
        "BMI", "DPF", "age", "class"]

df = pd.read_csv('data/pima-indians-diabetes.csv', names=names)

df.head()









    Out[6]:







  
    
      
      #of preg
      gluc_conc
      blood_pressure
      skin_thickness
      insulin_conc
      BMI
      DPF
      age
      class
    
  
  
    
      0
      6
      148
      72
      35
      0
      33.6
      0.627
      50
      1
    
    
      1
      1
      85
      66
      29
      0
      26.6
      0.351
      31
      0
    
    
      2
      8
      183
      64
      0
      0
      23.3
      0.672
      32
      1
    
    
      3
      1
      89
      66
      23
      94
      28.1
      0.167
      21
      0
    
    
      4
      0
      137
      40
      35
      168
      43.1
      2.288
      33
      1



In [7]:

    
X = df[df.columns[:-1].values]
y = df["class"].values



In [8]:

    
scaler = StandardScaler()
X_scale = scaler.fit_transform(X)



In [9]:

    
def create_model():
    model = Sequential()
    model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))
    model.add(Dense(8, kernel_initializer='uniform', activation="relu"))
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model



In [10]:

    
np.random.seed(7)



In [11]:

    
start = time.time()
model = KerasClassifier(build_fn=create_model, epochs=150, batch_size=10, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
results = cross_val_score(model, X_scale, y, cv=10)
print(results.mean())
print(time.time() - start)









    



0.769600135842
198.7945487499237



In [12]:

    
from keras.callbacks import Callback

Live ploting with Bokeh



In [13]:

    
from bokeh.io import push_notebook, output_notebook
from bokeh.layouts import row, widgetbox, column
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import PreText
from bokeh.plotting import figure, show
output_notebook()









    





    
        
        Loading BokehJS ...



In [14]:

    
source = ColumnDataSource(data=dict(x=[], y=[], z=[]))

plot = figure(plot_height=250, plot_width=700)

plot.circle('x', 'y', source=source)
plot.line('x', 'y', source=source, color='red')
plot.circle('x', 'z', source=source)
plot.line('x', 'z', source=source, color='green')
show(plot, notebook_handle=True)

new_data = {
    'x' : [],
    'y' : [],
    'z' : []
}

class TrainingHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.i = 1        

    def on_epoch_end(self, batch, logs={}):        
        self.losses.append(logs.get('loss'))
        new_data['x'] = [self.i]
        new_data['y'] = [logs.get('loss')]
        new_data['z'] = [logs.get('acc')]
        source.stream(new_data, rollover=30)
        # text_input.text = "Progress: " + str(self.i/50.0 * 100)
        push_notebook()
        self.i += 1
        
            
history = TrainingHistory()

def create_model():
    model = Sequential()
    model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))
    model.add(Dense(8, kernel_initializer='uniform', activation="relu"))
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = create_model()

model.fit(X_scale, y, epochs=250, batch_size=5, verbose=0, callbacks=[history])









    






    
        
    







    Out[14]:





<keras.callbacks.History at 0x7f657bf2acf8>

Parameter tuning with grid search



In [17]:

    
from sklearn.model_selection import GridSearchCV



In [18]:

    
def create_model(optimizer="rmsprop", init="glorot_uniform"):
    model = Sequential()
    model.add(Dense(12, input_dim=X.shape[1], kernel_initializer=init, activation='relu'))
    model.add(Dense(8, kernel_initializer=init, activation='relu'))
    model.add(Dense(1, kernel_initializer=init, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model



In [19]:

    
model = KerasClassifier(build_fn=create_model, verbose=0)
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform', 'uniform', 'normal']
epochs = np.arange(50, 300, 50)
batches = np.array([5, 10, 25, 32])



In [ ]:

    
param_grid = dict(optimizer=optimizers, epochs=epochs, init=init, batch_size=batches)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_results = grid.fit(X_scale, y)



In [ ]:

    
print("IT TOOK: {} minutes".format((time.time() - start)/60))

print("Best: %f using %s" % (grid_results.best_score_, grid_results.best_params_))
for params, mean_score, scores in grid_results.grid_scores_:
    print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))

IT TOOK: 128.50399666229885 minutes (Google Cloud, 8 CPU)

Best: 0.778646 using {'batch_size': 5, 'epochs': 150, 'init': 'uniform', 'optimizer': 'rmsprop'}

0.766927 (0.047771) with: {'batch_size': 5, 'epochs': 50, 'init': 'glorot_uniform', 'optimizer': 'rmsprop'} 0.761719 (0.027621) with: {'batch_size': 5, 'epochs': 50, 'init': 'glorot_uniform', 'optimizer': 'adam'} 0.766927 (0.032578) with: {'batch_size': 5, 'epochs': 50, 'init': 'uniform', 'optimizer': 'rmsprop'} 0.769531 (0.033754) with: {'batch_size': 5, 'epochs': 50, 'init': 'uniform', 'optimizer': 'adam'} 0.773438 (0.031412) with: {'batch_size': 5, 'epochs': 50, 'init': 'normal', 'optimizer': 'rmsprop'} ........

	#of preg	gluc_conc	blood_pressure	skin_thickness	insulin_conc	BMI	DPF	age	class
0	6	148	72	35	0	33.6	0.627	50	1
1	1	85	66	29	0	26.6	0.351	31	0
2	8	183	64	0	0	23.3	0.672	32	1
3	1	89	66	23	94	28.1	0.167	21	0
4	0	137	40	35	168	43.1	2.288	33	1