In [0]:
!pip install -q tf-nightly-gpu-2.0-preview

In [2]:
import tensorflow as tf
print(tf.__version__)


2.0.0-dev20190501

In [0]:
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow import keras

In [7]:
!curl -O https://raw.githubusercontent.com/DJCordhose/deep-learning-crash-course-notebooks/master/data/insurance-customers-1500.csv


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 26783  100 26783    0     0   112k      0 --:--:-- --:--:-- --:--:--  113k

In [6]:
df = pd.read_csv('./insurance-customers-1500.csv', sep=';')
y=df['group']
df.drop('group', axis='columns', inplace=True)
X = df.as_matrix()


/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:4: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  after removing the cwd from sys.path.

In [0]:
from sklearn.model_selection import train_test_split

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42, stratify=y)

In [9]:
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Activation

num_categories = 3
dropout = 0.6
model = tf.keras.Sequential()

model.add(Dense(100, name='hidden1', input_dim=3))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Dropout(dropout))

model.add(Dense(100, name='hidden2'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Dropout(dropout))

model.add(Dense(num_categories, name='softmax', activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])
model.summary()


WARNING: Logging before flag parsing goes to stderr.
W0502 09:55:40.101424 140631522654080 nn_ops.py:4202] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W0502 09:55:40.195079 140631522654080 nn_ops.py:4202] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
hidden1 (Dense)              (None, 100)               400       
_________________________________________________________________
activation (Activation)      (None, 100)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 100)               400       
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
hidden2 (Dense)              (None, 100)               10100     
_________________________________________________________________
activation_1 (Activation)    (None, 100)               0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 100)               400       
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
softmax (Dense)              (None, 3)                 303       
=================================================================
Total params: 11,603
Trainable params: 11,203
Non-trainable params: 400
_________________________________________________________________

We also limit the number of epochs further to 2000 (because we have seen that after that nothing good is going to happen)


In [10]:
%%time 

BATCH_SIZE=1000
EPOCHS = 2000

history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, verbose=0)


CPU times: user 24.1 s, sys: 1.87 s, total: 26 s
Wall time: 19.9 s

In [11]:
train_loss, train_accuracy = model.evaluate(X_train, y_train, batch_size=BATCH_SIZE)
train_loss, train_accuracy


900/900 [==============================] - 0s 69us/sample - loss: 0.5535 - accuracy: 0.7967
Out[11]:
(0.5535033345222473, 0.7966667)

In [12]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
test_loss, test_accuracy


600/600 [==============================] - 0s 109us/sample - loss: 0.5861 - accuracy: 0.7783
Out[12]:
(0.5860760807991028, 0.7783333)

In [15]:
# plt.yscale('log')
plt.ylabel("accuracy")
plt.xlabel("epochs")

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.legend(["Accuracy", "Valdation Accuracy"])


Out[15]:
<matplotlib.legend.Legend at 0x7fe6c2625a20>

Scores around 80% look good now, there might even be a bit more potential here, but we are not going after a final percent here


In [0]:
model.save('insurance.h5')

In [17]:
# the model has a decent size as we only have a little more than 10.000 parameters
!ls -l insurance.h5


-rw-r--r-- 1 root root 188144 May  2 09:57 insurance.h5