notebook.community

Edit and run



In [0]:

    
!pip install -q tf-nightly-gpu-2.0-preview



In [2]:

    
import tensorflow as tf
print(tf.__version__)









    



2.0.0-dev20190501



In [0]:

    
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow import keras



In [7]:

    
!curl -O https://raw.githubusercontent.com/DJCordhose/deep-learning-crash-course-notebooks/master/data/insurance-customers-1500.csv









    



  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 26783  100 26783    0     0   112k      0 --:--:-- --:--:-- --:--:--  113k



In [6]:

    
df = pd.read_csv('./insurance-customers-1500.csv', sep=';')
y=df['group']
df.drop('group', axis='columns', inplace=True)
X = df.as_matrix()









    



/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:4: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  after removing the cwd from sys.path.



In [0]:

    
from sklearn.model_selection import train_test_split



In [0]:

    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42, stratify=y)



In [9]:

    
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Activation

num_categories = 3
dropout = 0.6
model = tf.keras.Sequential()

model.add(Dense(100, name='hidden1', input_dim=3))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Dropout(dropout))

model.add(Dense(100, name='hidden2'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Dropout(dropout))

model.add(Dense(num_categories, name='softmax', activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])
model.summary()









    



WARNING: Logging before flag parsing goes to stderr.
W0502 09:55:40.101424 140631522654080 nn_ops.py:4202] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W0502 09:55:40.195079 140631522654080 nn_ops.py:4202] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.






    



Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
hidden1 (Dense)              (None, 100)               400       
_________________________________________________________________
activation (Activation)      (None, 100)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 100)               400       
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
hidden2 (Dense)              (None, 100)               10100     
_________________________________________________________________
activation_1 (Activation)    (None, 100)               0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 100)               400       
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
softmax (Dense)              (None, 3)                 303       
=================================================================
Total params: 11,603
Trainable params: 11,203
Non-trainable params: 400
_________________________________________________________________

We also limit the number of epochs further to 2000 (because we have seen that after that nothing good is going to happen)



In [10]:

    
%%time 

BATCH_SIZE=1000
EPOCHS = 2000

history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, verbose=0)









    



CPU times: user 24.1 s, sys: 1.87 s, total: 26 s
Wall time: 19.9 s



In [11]:

    
train_loss, train_accuracy = model.evaluate(X_train, y_train, batch_size=BATCH_SIZE)
train_loss, train_accuracy









    



900/900 [==============================] - 0s 69us/sample - loss: 0.5535 - accuracy: 0.7967






    Out[11]:





(0.5535033345222473, 0.7966667)



In [12]:

    
test_loss, test_accuracy = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
test_loss, test_accuracy









    



600/600 [==============================] - 0s 109us/sample - loss: 0.5861 - accuracy: 0.7783






    Out[12]:





(0.5860760807991028, 0.7783333)



In [15]:

    
# plt.yscale('log')
plt.ylabel("accuracy")
plt.xlabel("epochs")

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.legend(["Accuracy", "Valdation Accuracy"])









    Out[15]:





<matplotlib.legend.Legend at 0x7fe6c2625a20>

Scores around 80% look good now, there might even be a bit more potential here, but we are not going after a final percent here



In [0]:

    
model.save('insurance.h5')



In [17]:

    
# the model has a decent size as we only have a little more than 10.000 parameters
!ls -l insurance.h5









    



-rw-r--r-- 1 root root 188144 May  2 09:57 insurance.h5