In [5]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import time
In [6]:
# 1. Number of times pregnant
# 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test
# 3. Diastolic blood pressure (mm Hg)
# 4. Triceps skin fold thickness (mm)
# 5. 2-Hour serum insulin (mu U/ml)
# 6. Body mass index (weight in kg/(height in m)^2)
# 7. Diabetes pedigree function
# 8. Age (years)
# 9. Class variable (0 or 1)
names = ["#of preg", "gluc_conc", "blood_pressure", "skin_thickness", "insulin_conc",
"BMI", "DPF", "age", "class"]
df = pd.read_csv('data/pima-indians-diabetes.csv', names=names)
df.head()
Out[6]:
In [7]:
X = df[df.columns[:-1].values]
y = df["class"].values
In [8]:
scaler = StandardScaler()
X_scale = scaler.fit_transform(X)
In [9]:
def create_model():
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))
model.add(Dense(8, kernel_initializer='uniform', activation="relu"))
model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
In [10]:
np.random.seed(7)
In [11]:
start = time.time()
model = KerasClassifier(build_fn=create_model, epochs=150, batch_size=10, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
results = cross_val_score(model, X_scale, y, cv=10)
print(results.mean())
print(time.time() - start)
In [12]:
from keras.callbacks import Callback
In [13]:
from bokeh.io import push_notebook, output_notebook
from bokeh.layouts import row, widgetbox, column
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import PreText
from bokeh.plotting import figure, show
output_notebook()
In [14]:
source = ColumnDataSource(data=dict(x=[], y=[], z=[]))
plot = figure(plot_height=250, plot_width=700)
plot.circle('x', 'y', source=source)
plot.line('x', 'y', source=source, color='red')
plot.circle('x', 'z', source=source)
plot.line('x', 'z', source=source, color='green')
show(plot, notebook_handle=True)
new_data = {
'x' : [],
'y' : [],
'z' : []
}
class TrainingHistory(Callback):
def on_train_begin(self, logs={}):
self.losses = []
self.i = 1
def on_epoch_end(self, batch, logs={}):
self.losses.append(logs.get('loss'))
new_data['x'] = [self.i]
new_data['y'] = [logs.get('loss')]
new_data['z'] = [logs.get('acc')]
source.stream(new_data, rollover=30)
# text_input.text = "Progress: " + str(self.i/50.0 * 100)
push_notebook()
self.i += 1
history = TrainingHistory()
def create_model():
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))
model.add(Dense(8, kernel_initializer='uniform', activation="relu"))
model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = create_model()
model.fit(X_scale, y, epochs=250, batch_size=5, verbose=0, callbacks=[history])
Out[14]:
In [17]:
from sklearn.model_selection import GridSearchCV
In [18]:
def create_model(optimizer="rmsprop", init="glorot_uniform"):
model = Sequential()
model.add(Dense(12, input_dim=X.shape[1], kernel_initializer=init, activation='relu'))
model.add(Dense(8, kernel_initializer=init, activation='relu'))
model.add(Dense(1, kernel_initializer=init, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
return model
In [19]:
model = KerasClassifier(build_fn=create_model, verbose=0)
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform', 'uniform', 'normal']
epochs = np.arange(50, 300, 50)
batches = np.array([5, 10, 25, 32])
In [ ]:
param_grid = dict(optimizer=optimizers, epochs=epochs, init=init, batch_size=batches)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_results = grid.fit(X_scale, y)
In [ ]:
print("IT TOOK: {} minutes".format((time.time() - start)/60))
print("Best: %f using %s" % (grid_results.best_score_, grid_results.best_params_))
for params, mean_score, scores in grid_results.grid_scores_:
print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))
IT TOOK: 128.50399666229885 minutes (Google Cloud, 8 CPU)
Best: 0.778646 using {'batch_size': 5, 'epochs': 150, 'init': 'uniform', 'optimizer': 'rmsprop'}
0.766927 (0.047771) with: {'batch_size': 5, 'epochs': 50, 'init': 'glorot_uniform', 'optimizer': 'rmsprop'} 0.761719 (0.027621) with: {'batch_size': 5, 'epochs': 50, 'init': 'glorot_uniform', 'optimizer': 'adam'} 0.766927 (0.032578) with: {'batch_size': 5, 'epochs': 50, 'init': 'uniform', 'optimizer': 'rmsprop'} 0.769531 (0.033754) with: {'batch_size': 5, 'epochs': 50, 'init': 'uniform', 'optimizer': 'adam'} 0.773438 (0.031412) with: {'batch_size': 5, 'epochs': 50, 'init': 'normal', 'optimizer': 'rmsprop'} ........