In [1]:
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%pylab inline
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras
In [2]:
!curl -O https://raw.githubusercontent.com/DJCordhose/deep-learning-crash-course-notebooks/master/data/insurance-customers-1500.csv
In [0]:
df = pd.read_csv('./insurance-customers-1500.csv', sep=';')
y = df['group']
df.drop('group', axis='columns', inplace=True)
X = df.as_matrix()
In [0]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
In [6]:
from tensorflow.keras.layers import Dense, Dropout, \
BatchNormalization, Activation
dropout = 0.6
model = keras.Sequential()
# reduce capacity by decreasing number of neurons
model.add(Dense(500, name='hidden1', input_dim=3))
# model.add(BatchNormalization())
model.add(Activation('relu'))
# model.add(Dropout(dropout))
model.add(Dense(500, name='hidden2'))
# model.add(BatchNormalization())
model.add(Activation('relu'))
# model.add(Dropout(dropout))
model.add(Dense(3, name='softmax', activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
In [7]:
# reducing batch size might increase overfitting,
# but might be necessary to reduce memory requirements
BATCH_SIZE=1000
# reduce this based on what you see in the training history
EPOCHS = 10000
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
%time history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, verbose=0)
In [8]:
train_loss, train_accuracy = model.evaluate(X_train, y_train, batch_size=BATCH_SIZE)
train_accuracy
Out[8]:
In [9]:
def plot_history(history, samples=100, init_phase_samples=None):
epochs = history.params['epochs']
acc = history.history['acc']
val_acc = history.history['val_acc']
every_sample = int(epochs / samples)
acc = pd.DataFrame(acc).iloc[::every_sample, :]
val_acc = pd.DataFrame(val_acc).iloc[::every_sample, :]
fig, ax = plt.subplots(figsize=(20,5))
ax.plot(acc, 'bo', label='Training acc')
ax.plot(val_acc, 'b', label='Validation acc')
ax.set_title('Training and validation accuracy')
ax.legend()
plot_history(history)
In [10]:
model.predict(np.array([[100, 47, 10]]))
Out[10]:
In [11]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
test_accuracy
Out[11]:
In [0]: