Installing packages


In [1]:
# Installing Theano
# pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git

# Installing Tensorflow
# pip install tensorflow

# Installing Keras
# pip install --upgrade keras

Data Preprocessing


In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('./Artificial_Neural_Networks/Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values
  • y (actual value): exited, this is the value we are trying to predict, which means if the customer stays or exit the bank.

In [3]:
print (X.shape)
X


(10000, 10)
Out[3]:
array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ..., 
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [4]:
print (y.shape)
y


(10000,)
Out[4]:
array([1, 0, 1, ..., 1, 1, 0])

In [2]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:, 1:]

In [6]:
print (X.shape)
X


(10000, 11)
Out[6]:
array([[  0.00000000e+00,   0.00000000e+00,   6.19000000e+02, ...,
          1.00000000e+00,   1.00000000e+00,   1.01348880e+05],
       [  0.00000000e+00,   1.00000000e+00,   6.08000000e+02, ...,
          0.00000000e+00,   1.00000000e+00,   1.12542580e+05],
       [  0.00000000e+00,   0.00000000e+00,   5.02000000e+02, ...,
          1.00000000e+00,   0.00000000e+00,   1.13931570e+05],
       ..., 
       [  0.00000000e+00,   0.00000000e+00,   7.09000000e+02, ...,
          0.00000000e+00,   1.00000000e+00,   4.20855800e+04],
       [  1.00000000e+00,   0.00000000e+00,   7.72000000e+02, ...,
          1.00000000e+00,   0.00000000e+00,   9.28885200e+04],
       [  0.00000000e+00,   0.00000000e+00,   7.92000000e+02, ...,
          1.00000000e+00,   0.00000000e+00,   3.81907800e+04]])

In [7]:
print (y.shape)
y


(10000,)
Out[7]:
array([1, 0, 1, ..., 1, 1, 0])

In [3]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [4]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

Building an ANN


In [5]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense


Using TensorFlow backend.

In [11]:
# Initialising the ANN
classifier = Sequential()

In [12]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))

# Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))

# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

In [13]:
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)


Epoch 1/100
8000/8000 [==============================] - 2s - loss: 0.4849 - acc: 0.7957     
Epoch 2/100
8000/8000 [==============================] - 2s - loss: 0.4271 - acc: 0.7960     
Epoch 3/100
8000/8000 [==============================] - 2s - loss: 0.4208 - acc: 0.8016     
Epoch 4/100
8000/8000 [==============================] - 2s - loss: 0.4175 - acc: 0.8232     
Epoch 5/100
8000/8000 [==============================] - 2s - loss: 0.4155 - acc: 0.8285     
Epoch 6/100
8000/8000 [==============================] - 2s - loss: 0.4134 - acc: 0.8309     
Epoch 7/100
8000/8000 [==============================] - 2s - loss: 0.4121 - acc: 0.8317     
Epoch 8/100
8000/8000 [==============================] - 2s - loss: 0.4107 - acc: 0.8334     
Epoch 9/100
8000/8000 [==============================] - 2s - loss: 0.4103 - acc: 0.8346     
Epoch 10/100
8000/8000 [==============================] - 2s - loss: 0.4091 - acc: 0.8340     
Epoch 11/100
8000/8000 [==============================] - 2s - loss: 0.4078 - acc: 0.8350     
Epoch 12/100
8000/8000 [==============================] - 2s - loss: 0.4072 - acc: 0.8340     
Epoch 13/100
8000/8000 [==============================] - 2s - loss: 0.4065 - acc: 0.8344     
Epoch 14/100
8000/8000 [==============================] - 2s - loss: 0.4065 - acc: 0.8349     
Epoch 15/100
8000/8000 [==============================] - 2s - loss: 0.4057 - acc: 0.8355     
Epoch 16/100
8000/8000 [==============================] - 2s - loss: 0.4050 - acc: 0.8340     
Epoch 17/100
8000/8000 [==============================] - 2s - loss: 0.4049 - acc: 0.8354     
Epoch 18/100
8000/8000 [==============================] - 2s - loss: 0.4040 - acc: 0.8332     
Epoch 19/100
8000/8000 [==============================] - 2s - loss: 0.4028 - acc: 0.8352     
Epoch 20/100
8000/8000 [==============================] - 2s - loss: 0.4022 - acc: 0.8356     
Epoch 21/100
8000/8000 [==============================] - 2s - loss: 0.4013 - acc: 0.8360     
Epoch 22/100
8000/8000 [==============================] - 2s - loss: 0.4007 - acc: 0.8352     
Epoch 23/100
8000/8000 [==============================] - 2s - loss: 0.3999 - acc: 0.8364     
Epoch 24/100
8000/8000 [==============================] - 2s - loss: 0.3999 - acc: 0.8361     
Epoch 25/100
8000/8000 [==============================] - 2s - loss: 0.3990 - acc: 0.8344     
Epoch 26/100
8000/8000 [==============================] - 2s - loss: 0.3985 - acc: 0.8359     
Epoch 27/100
8000/8000 [==============================] - 2s - loss: 0.3977 - acc: 0.8354     
Epoch 28/100
8000/8000 [==============================] - 2s - loss: 0.3975 - acc: 0.8351     
Epoch 29/100
8000/8000 [==============================] - 2s - loss: 0.3975 - acc: 0.8344     
Epoch 30/100
8000/8000 [==============================] - 2s - loss: 0.3975 - acc: 0.8350     
Epoch 31/100
8000/8000 [==============================] - 2s - loss: 0.3967 - acc: 0.8354     
Epoch 32/100
8000/8000 [==============================] - 2s - loss: 0.3965 - acc: 0.8342     
Epoch 33/100
8000/8000 [==============================] - 2s - loss: 0.3964 - acc: 0.8354     
Epoch 34/100
8000/8000 [==============================] - 2s - loss: 0.3964 - acc: 0.8359     
Epoch 35/100
8000/8000 [==============================] - 2s - loss: 0.3962 - acc: 0.8356     
Epoch 36/100
8000/8000 [==============================] - 2s - loss: 0.3958 - acc: 0.8367     
Epoch 37/100
8000/8000 [==============================] - 2s - loss: 0.3953 - acc: 0.8364     
Epoch 38/100
8000/8000 [==============================] - 2s - loss: 0.3958 - acc: 0.8355     
Epoch 39/100
8000/8000 [==============================] - 2s - loss: 0.3954 - acc: 0.8380     
Epoch 40/100
8000/8000 [==============================] - 2s - loss: 0.3954 - acc: 0.8366     
Epoch 41/100
8000/8000 [==============================] - 2s - loss: 0.3954 - acc: 0.8340     
Epoch 42/100
8000/8000 [==============================] - 2s - loss: 0.3952 - acc: 0.8362     
Epoch 43/100
8000/8000 [==============================] - 2s - loss: 0.3947 - acc: 0.8350     
Epoch 44/100
8000/8000 [==============================] - 2s - loss: 0.3951 - acc: 0.8362     
Epoch 45/100
8000/8000 [==============================] - 2s - loss: 0.3945 - acc: 0.8356     
Epoch 46/100
8000/8000 [==============================] - 2s - loss: 0.3949 - acc: 0.8354     
Epoch 47/100
8000/8000 [==============================] - 2s - loss: 0.3952 - acc: 0.8366     
Epoch 48/100
8000/8000 [==============================] - 2s - loss: 0.3950 - acc: 0.8377     
Epoch 49/100
8000/8000 [==============================] - 2s - loss: 0.3942 - acc: 0.8359     
Epoch 50/100
8000/8000 [==============================] - 2s - loss: 0.3944 - acc: 0.8357     
Epoch 51/100
8000/8000 [==============================] - 2s - loss: 0.3947 - acc: 0.8359     
Epoch 52/100
8000/8000 [==============================] - 3s - loss: 0.3944 - acc: 0.8379     
Epoch 53/100
8000/8000 [==============================] - 3s - loss: 0.3946 - acc: 0.8365     
Epoch 54/100
8000/8000 [==============================] - 3s - loss: 0.3946 - acc: 0.8366     
Epoch 55/100
8000/8000 [==============================] - 2s - loss: 0.3943 - acc: 0.8356     
Epoch 56/100
8000/8000 [==============================] - 2s - loss: 0.3944 - acc: 0.8352     
Epoch 57/100
8000/8000 [==============================] - 2s - loss: 0.3948 - acc: 0.8371     
Epoch 58/100
8000/8000 [==============================] - 3s - loss: 0.3940 - acc: 0.8371     
Epoch 59/100
8000/8000 [==============================] - 3s - loss: 0.3944 - acc: 0.8361     
Epoch 60/100
8000/8000 [==============================] - 3s - loss: 0.3941 - acc: 0.8361     
Epoch 61/100
8000/8000 [==============================] - 2s - loss: 0.3942 - acc: 0.8367     
Epoch 62/100
8000/8000 [==============================] - 2s - loss: 0.3933 - acc: 0.8372     
Epoch 63/100
8000/8000 [==============================] - 2s - loss: 0.3940 - acc: 0.8366     
Epoch 64/100
8000/8000 [==============================] - 2s - loss: 0.3937 - acc: 0.8362     
Epoch 65/100
8000/8000 [==============================] - 2s - loss: 0.3938 - acc: 0.8365     
Epoch 66/100
8000/8000 [==============================] - 2s - loss: 0.3929 - acc: 0.8377     
Epoch 67/100
8000/8000 [==============================] - 2s - loss: 0.3934 - acc: 0.8361     
Epoch 68/100
8000/8000 [==============================] - 2s - loss: 0.3936 - acc: 0.8350     
Epoch 69/100
8000/8000 [==============================] - 2s - loss: 0.3930 - acc: 0.8370     
Epoch 70/100
8000/8000 [==============================] - 2s - loss: 0.3922 - acc: 0.8376     
Epoch 71/100
8000/8000 [==============================] - 2s - loss: 0.3921 - acc: 0.8374     
Epoch 72/100
8000/8000 [==============================] - 2s - loss: 0.3919 - acc: 0.8381     
Epoch 73/100
8000/8000 [==============================] - 2s - loss: 0.3918 - acc: 0.8375     
Epoch 74/100
8000/8000 [==============================] - 2s - loss: 0.3910 - acc: 0.8380     
Epoch 75/100
8000/8000 [==============================] - 3s - loss: 0.3907 - acc: 0.8375     
Epoch 76/100
8000/8000 [==============================] - 3s - loss: 0.3902 - acc: 0.8386     
Epoch 77/100
8000/8000 [==============================] - 2s - loss: 0.3897 - acc: 0.8380     
Epoch 78/100
8000/8000 [==============================] - 2s - loss: 0.3892 - acc: 0.8386     
Epoch 79/100
8000/8000 [==============================] - 2s - loss: 0.3873 - acc: 0.8389     
Epoch 80/100
8000/8000 [==============================] - 2s - loss: 0.3848 - acc: 0.8386     
Epoch 81/100
8000/8000 [==============================] - 2s - loss: 0.3818 - acc: 0.8377     
Epoch 82/100
8000/8000 [==============================] - 2s - loss: 0.3790 - acc: 0.8381     
Epoch 83/100
8000/8000 [==============================] - 2s - loss: 0.3749 - acc: 0.8396     
Epoch 84/100
8000/8000 [==============================] - 2s - loss: 0.3701 - acc: 0.8419     
Epoch 85/100
8000/8000 [==============================] - 2s - loss: 0.3665 - acc: 0.8437     
Epoch 86/100
8000/8000 [==============================] - 2s - loss: 0.3641 - acc: 0.8452     
Epoch 87/100
8000/8000 [==============================] - 2s - loss: 0.3616 - acc: 0.8467     
Epoch 88/100
8000/8000 [==============================] - 2s - loss: 0.3598 - acc: 0.8511     
Epoch 89/100
8000/8000 [==============================] - 2s - loss: 0.3583 - acc: 0.8512     
Epoch 90/100
8000/8000 [==============================] - 2s - loss: 0.3576 - acc: 0.8530     
Epoch 91/100
8000/8000 [==============================] - 2s - loss: 0.3565 - acc: 0.8536     
Epoch 92/100
8000/8000 [==============================] - 2s - loss: 0.3554 - acc: 0.8545     
Epoch 93/100
8000/8000 [==============================] - 2s - loss: 0.3534 - acc: 0.8545     
Epoch 94/100
8000/8000 [==============================] - 2s - loss: 0.3527 - acc: 0.8560     
Epoch 95/100
8000/8000 [==============================] - 2s - loss: 0.3514 - acc: 0.8555     
Epoch 96/100
8000/8000 [==============================] - 2s - loss: 0.3498 - acc: 0.8557     
Epoch 97/100
8000/8000 [==============================] - 2s - loss: 0.3496 - acc: 0.8585     
Epoch 98/100
8000/8000 [==============================] - 2s - loss: 0.3489 - acc: 0.8584     
Epoch 99/100
8000/8000 [==============================] - 2s - loss: 0.3479 - acc: 0.8589     
Epoch 100/100
8000/8000 [==============================] - 2s - loss: 0.3478 - acc: 0.8577     
Out[13]:
<keras.callbacks.History at 0x12731e908>

Making predictions and evaluating the model


In [14]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

In [15]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [16]:
cm


Out[16]:
array([[1503,   92],
       [ 189,  216]])

Evaluating, Improving and Tuning the ANN

  • Using K-Fold Cross validation with Keras

In [7]:
# Evaluating the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense
def build_classifier():
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10, epochs = 100)
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)
mean = accuracies.mean()
variance = accuracies.std()

In [ ]:
# Improving the ANN
# Dropout Regularization to reduce overfitting if needed

# Tuning the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

def build_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
    # classifier.add(Dropout(p = 0.1))
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
    # classifier.add(Dropout(p = 0.1))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier
classifier = KerasClassifier(build_fn = build_classifier)
parameters = {'batch_size': [25, 32],
              'epochs': [100, 500],
              'optimizer': ['adam', 'rmsprop']}
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10)
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_
  • Drop out regularization: A solution for overfitting - high variance

    • At each iteration of training, some neurons of your artificial neural network are randomly disabled to prevent them from being too dependent on each other when they learn the correlations
    • Therefore, by overwriting these neurons, the ANN learn several independent correlations in the data because each time there is not the same configuration of the neurons.
    • The fact that we get these independent correlations of the data, meaning the neurons work more independently, that prevents the neuron from learning too much and therefore overfitting.
  • Using Dropout() argument:

    • p: the fraction of the input units you want to drop/disable at each iteration. For example: if we have 10 neurons, p=0.1 (10%), this means at each iteration, 1 neuron will be disabled.