notebook.community

Edit and run



In [1]:

    
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline



In [2]:

    
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values



In [3]:

    
dataset.head()









    Out[3]:







  
    
      
      RowNumber
      CustomerId
      Surname
      CreditScore
      Geography
      Gender
      Age
      Tenure
      Balance
      NumOfProducts
      HasCrCard
      IsActiveMember
      EstimatedSalary
      Exited
    
  
  
    
      0
      1
      15634602
      Hargrave
      619
      France
      Female
      42
      2
      0.00
      1
      1
      1
      101348.88
      1
    
    
      1
      2
      15647311
      Hill
      608
      Spain
      Female
      41
      1
      83807.86
      1
      0
      1
      112542.58
      0
    
    
      2
      3
      15619304
      Onio
      502
      France
      Female
      42
      8
      159660.80
      3
      1
      0
      113931.57
      1
    
    
      3
      4
      15701354
      Boni
      699
      France
      Female
      39
      1
      0.00
      2
      0
      0
      93826.63
      0
    
    
      4
      5
      15737888
      Mitchell
      850
      Spain
      Female
      43
      2
      125510.82
      1
      1
      1
      79084.10
      0



In [4]:

    
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])



In [5]:

    
X[0:3, :]









    Out[5]:





array([[619, 0, 0, 42, 2, 0.0, 1, 1, 1, 101348.88],
       [608, 2, 0, 41, 1, 83807.86, 1, 0, 1, 112542.58],
       [502, 0, 0, 42, 8, 159660.8, 3, 1, 0, 113931.57]], dtype=object)



In [6]:

    
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()



In [7]:

    
# Removing dummy variable, as [1, 0, 0] can be presented as [0,0]
X = X[:, 1:]



In [8]:

    
X[0:2, :]









    Out[8]:





array([[  0.00000000e+00,   0.00000000e+00,   6.19000000e+02,
          0.00000000e+00,   4.20000000e+01,   2.00000000e+00,
          0.00000000e+00,   1.00000000e+00,   1.00000000e+00,
          1.00000000e+00,   1.01348880e+05],
       [  0.00000000e+00,   1.00000000e+00,   6.08000000e+02,
          0.00000000e+00,   4.10000000e+01,   1.00000000e+00,
          8.38078600e+04,   1.00000000e+00,   0.00000000e+00,
          1.00000000e+00,   1.12542580e+05]])



In [9]:

    
# My way to get similar data above:
d_X = dataset.iloc[:, 3:13]
d_y = dataset.iloc[:, 13]



In [10]:

    
d_X = pd.get_dummies(d_X)



In [11]:

    
d_X.head()









    Out[11]:







  
    
      
      CreditScore
      Age
      Tenure
      Balance
      NumOfProducts
      HasCrCard
      IsActiveMember
      EstimatedSalary
      Geography_France
      Geography_Germany
      Geography_Spain
      Gender_Female
      Gender_Male
    
  
  
    
      0
      619
      42
      2
      0.00
      1
      1
      1
      101348.88
      1
      0
      0
      1
      0
    
    
      1
      608
      41
      1
      83807.86
      1
      0
      1
      112542.58
      0
      0
      1
      1
      0
    
    
      2
      502
      42
      8
      159660.80
      3
      1
      0
      113931.57
      1
      0
      0
      1
      0
    
    
      3
      699
      39
      1
      0.00
      2
      0
      0
      93826.63
      1
      0
      0
      1
      0
    
    
      4
      850
      43
      2
      125510.82
      1
      1
      1
      79084.10
      0
      0
      1
      1
      0



In [12]:

    
d_X.drop(['Geography_France', 'Gender_Female'], axis=1, inplace=True)



In [13]:

    
d_X.head()









    Out[13]:







  
    
      
      CreditScore
      Age
      Tenure
      Balance
      NumOfProducts
      HasCrCard
      IsActiveMember
      EstimatedSalary
      Geography_Germany
      Geography_Spain
      Gender_Male
    
  
  
    
      0
      619
      42
      2
      0.00
      1
      1
      1
      101348.88
      0
      0
      0
    
    
      1
      608
      41
      1
      83807.86
      1
      0
      1
      112542.58
      0
      1
      0
    
    
      2
      502
      42
      8
      159660.80
      3
      1
      0
      113931.57
      0
      0
      0
    
    
      3
      699
      39
      1
      0.00
      2
      0
      0
      93826.63
      0
      0
      0
    
    
      4
      850
      43
      2
      125510.82
      1
      1
      1
      79084.10
      0
      1
      0



In [14]:

    
d_X.shape









    Out[14]:





(10000, 11)



In [15]:

    
X.shape









    Out[15]:





(10000, 11)

Train Test Split



In [16]:

    
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)



In [17]:

    
# Try my way: remove this if things go wrong later:
X_train, X_test, y_train, y_test = train_test_split(d_X, y, test_size = 0.2, random_state = 0)

Feature scaling



In [18]:

    
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

Part 2 - Now let's make the ANN!



In [64]:

    
import keras
from keras.models import Sequential
from keras.layers import Dense

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

# To make results reproduceable and setting gpu memory limit on tf.
# https://github.com/fchollet/keras/issues/2280

np.random.seed(123)
tf.set_random_seed(123)
config = tf.ConfigProto(inter_op_parallelism_threads=1)
config.gpu_options.per_process_gpu_memory_fraction = 0.1 # in my case this setting will use around 1G memory on GPU
set_session(tf.Session(config=config))



In [65]:

    
# Initialising the ANN
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))

# Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))

# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

# Compiling the ANN
opt_adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) # 
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])



In [66]:

    
# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 100, epochs = 100, verbose=True)









    



Epoch 1/100
8000/8000 [==============================] - 0s - loss: 0.6727 - acc: 0.7935     
Epoch 2/100
8000/8000 [==============================] - 0s - loss: 0.5633 - acc: 0.7960     
Epoch 3/100
8000/8000 [==============================] - 0s - loss: 0.4571 - acc: 0.7960     
Epoch 4/100
8000/8000 [==============================] - 0s - loss: 0.4407 - acc: 0.7960     
Epoch 5/100
8000/8000 [==============================] - 0s - loss: 0.4361 - acc: 0.7960     
Epoch 6/100
8000/8000 [==============================] - 0s - loss: 0.4331 - acc: 0.7960     
Epoch 7/100
8000/8000 [==============================] - 0s - loss: 0.4311 - acc: 0.7960     
Epoch 8/100
8000/8000 [==============================] - 0s - loss: 0.4296 - acc: 0.7960     
Epoch 9/100
8000/8000 [==============================] - 0s - loss: 0.4287 - acc: 0.7960     
Epoch 10/100
8000/8000 [==============================] - 0s - loss: 0.4275 - acc: 0.7960     
Epoch 11/100
8000/8000 [==============================] - 0s - loss: 0.4267 - acc: 0.7960     
Epoch 12/100
8000/8000 [==============================] - 0s - loss: 0.4256 - acc: 0.7960     
Epoch 13/100
8000/8000 [==============================] - 0s - loss: 0.4250 - acc: 0.7960     
Epoch 14/100
8000/8000 [==============================] - 0s - loss: 0.4233 - acc: 0.7960     
Epoch 15/100
8000/8000 [==============================] - 0s - loss: 0.4226 - acc: 0.7960     
Epoch 16/100
8000/8000 [==============================] - 0s - loss: 0.4213 - acc: 0.7960     
Epoch 17/100
8000/8000 [==============================] - 0s - loss: 0.4205 - acc: 0.7960     
Epoch 18/100
8000/8000 [==============================] - 0s - loss: 0.4199 - acc: 0.7960     
Epoch 19/100
8000/8000 [==============================] - 0s - loss: 0.4191 - acc: 0.8106     
Epoch 20/100
8000/8000 [==============================] - 0s - loss: 0.4186 - acc: 0.8195     
Epoch 21/100
8000/8000 [==============================] - 0s - loss: 0.4180 - acc: 0.8205     
Epoch 22/100
8000/8000 [==============================] - 0s - loss: 0.4173 - acc: 0.8229     
Epoch 23/100
8000/8000 [==============================] - 0s - loss: 0.4171 - acc: 0.8245     
Epoch 24/100
8000/8000 [==============================] - 0s - loss: 0.4162 - acc: 0.8239     
Epoch 25/100
8000/8000 [==============================] - 0s - loss: 0.4156 - acc: 0.8254     
Epoch 26/100
8000/8000 [==============================] - 0s - loss: 0.4150 - acc: 0.8260     
Epoch 27/100
8000/8000 [==============================] - 0s - loss: 0.4146 - acc: 0.8262     
Epoch 28/100
8000/8000 [==============================] - 0s - loss: 0.4142 - acc: 0.8287     
Epoch 29/100
8000/8000 [==============================] - 0s - loss: 0.4137 - acc: 0.8281     
Epoch 30/100
8000/8000 [==============================] - 0s - loss: 0.4134 - acc: 0.8302     
Epoch 31/100
8000/8000 [==============================] - 0s - loss: 0.4129 - acc: 0.8299     
Epoch 32/100
8000/8000 [==============================] - 0s - loss: 0.4125 - acc: 0.8305     
Epoch 33/100
8000/8000 [==============================] - 0s - loss: 0.4120 - acc: 0.8310     
Epoch 34/100
8000/8000 [==============================] - 0s - loss: 0.4117 - acc: 0.8306     
Epoch 35/100
8000/8000 [==============================] - 0s - loss: 0.4114 - acc: 0.8319     
Epoch 36/100
8000/8000 [==============================] - 0s - loss: 0.4109 - acc: 0.8325     
Epoch 37/100
8000/8000 [==============================] - 0s - loss: 0.4105 - acc: 0.8327     
Epoch 38/100
8000/8000 [==============================] - 0s - loss: 0.4101 - acc: 0.8339     
Epoch 39/100
8000/8000 [==============================] - 0s - loss: 0.4098 - acc: 0.8339     
Epoch 40/100
8000/8000 [==============================] - 0s - loss: 0.4094 - acc: 0.8334     
Epoch 41/100
8000/8000 [==============================] - 0s - loss: 0.4093 - acc: 0.8339     
Epoch 42/100
8000/8000 [==============================] - 0s - loss: 0.4089 - acc: 0.8340     
Epoch 43/100
8000/8000 [==============================] - 0s - loss: 0.4084 - acc: 0.8350     
Epoch 44/100
8000/8000 [==============================] - 0s - loss: 0.4082 - acc: 0.8349     
Epoch 45/100
8000/8000 [==============================] - 0s - loss: 0.4077 - acc: 0.8349     
Epoch 46/100
8000/8000 [==============================] - 0s - loss: 0.4075 - acc: 0.8347     
Epoch 47/100
8000/8000 [==============================] - 0s - loss: 0.4076 - acc: 0.8354     
Epoch 48/100
8000/8000 [==============================] - 0s - loss: 0.4071 - acc: 0.8349     
Epoch 49/100
8000/8000 [==============================] - 0s - loss: 0.4068 - acc: 0.8345     
Epoch 50/100
8000/8000 [==============================] - 0s - loss: 0.4067 - acc: 0.8337     
Epoch 51/100
8000/8000 [==============================] - 0s - loss: 0.4065 - acc: 0.8346     
Epoch 52/100
8000/8000 [==============================] - 0s - loss: 0.4060 - acc: 0.8342     
Epoch 53/100
8000/8000 [==============================] - 0s - loss: 0.4059 - acc: 0.8349     
Epoch 54/100
8000/8000 [==============================] - 0s - loss: 0.4056 - acc: 0.8347     
Epoch 55/100
8000/8000 [==============================] - 0s - loss: 0.4055 - acc: 0.8349     
Epoch 56/100
8000/8000 [==============================] - 0s - loss: 0.4052 - acc: 0.8345     
Epoch 57/100
8000/8000 [==============================] - 0s - loss: 0.4050 - acc: 0.8346     
Epoch 58/100
8000/8000 [==============================] - 0s - loss: 0.4049 - acc: 0.8352     
Epoch 59/100
8000/8000 [==============================] - 0s - loss: 0.4044 - acc: 0.8351     
Epoch 60/100
8000/8000 [==============================] - 0s - loss: 0.4047 - acc: 0.8346     
Epoch 61/100
8000/8000 [==============================] - 0s - loss: 0.4043 - acc: 0.8351     
Epoch 62/100
8000/8000 [==============================] - 0s - loss: 0.4041 - acc: 0.8346     
Epoch 63/100
8000/8000 [==============================] - 0s - loss: 0.4039 - acc: 0.8351     
Epoch 64/100
8000/8000 [==============================] - 0s - loss: 0.4041 - acc: 0.8337     
Epoch 65/100
8000/8000 [==============================] - 0s - loss: 0.4036 - acc: 0.8349     
Epoch 66/100
8000/8000 [==============================] - 0s - loss: 0.4035 - acc: 0.8349     
Epoch 67/100
8000/8000 [==============================] - 0s - loss: 0.4032 - acc: 0.8352     
Epoch 68/100
8000/8000 [==============================] - 0s - loss: 0.4033 - acc: 0.8359     
Epoch 69/100
8000/8000 [==============================] - 0s - loss: 0.4030 - acc: 0.8345     
Epoch 70/100
8000/8000 [==============================] - 0s - loss: 0.4030 - acc: 0.8356     
Epoch 71/100
8000/8000 [==============================] - 0s - loss: 0.4026 - acc: 0.8344     
Epoch 72/100
8000/8000 [==============================] - 0s - loss: 0.4029 - acc: 0.8356     
Epoch 73/100
8000/8000 [==============================] - 0s - loss: 0.4027 - acc: 0.8350     
Epoch 74/100
8000/8000 [==============================] - 0s - loss: 0.4024 - acc: 0.8356     
Epoch 75/100
8000/8000 [==============================] - 0s - loss: 0.4023 - acc: 0.8351     
Epoch 76/100
8000/8000 [==============================] - 0s - loss: 0.4019 - acc: 0.8346     
Epoch 77/100
8000/8000 [==============================] - 0s - loss: 0.4019 - acc: 0.8344     
Epoch 78/100
8000/8000 [==============================] - 0s - loss: 0.4020 - acc: 0.8357     
Epoch 79/100
8000/8000 [==============================] - 0s - loss: 0.4021 - acc: 0.8352     
Epoch 80/100
8000/8000 [==============================] - 0s - loss: 0.4018 - acc: 0.8355     
Epoch 81/100
8000/8000 [==============================] - 0s - loss: 0.4015 - acc: 0.8360     
Epoch 82/100
8000/8000 [==============================] - 0s - loss: 0.4014 - acc: 0.8347     
Epoch 83/100
8000/8000 [==============================] - 0s - loss: 0.4015 - acc: 0.8351     
Epoch 84/100
8000/8000 [==============================] - 0s - loss: 0.4014 - acc: 0.8355     
Epoch 85/100
8000/8000 [==============================] - 0s - loss: 0.4012 - acc: 0.8360     
Epoch 86/100
8000/8000 [==============================] - 0s - loss: 0.4012 - acc: 0.8356     
Epoch 87/100
8000/8000 [==============================] - 0s - loss: 0.4010 - acc: 0.8360     
Epoch 88/100
8000/8000 [==============================] - 0s - loss: 0.4010 - acc: 0.8351     
Epoch 89/100
8000/8000 [==============================] - 0s - loss: 0.4010 - acc: 0.8351     
Epoch 90/100
8000/8000 [==============================] - 0s - loss: 0.4008 - acc: 0.8356     
Epoch 91/100
8000/8000 [==============================] - 0s - loss: 0.4006 - acc: 0.8359     
Epoch 92/100
8000/8000 [==============================] - 0s - loss: 0.4006 - acc: 0.8359     
Epoch 93/100
8000/8000 [==============================] - 0s - loss: 0.4004 - acc: 0.8352     
Epoch 94/100
8000/8000 [==============================] - 0s - loss: 0.4005 - acc: 0.8361     
Epoch 95/100
8000/8000 [==============================] - 0s - loss: 0.4005 - acc: 0.8364     
Epoch 96/100
8000/8000 [==============================] - 0s - loss: 0.4005 - acc: 0.8350     
Epoch 97/100
8000/8000 [==============================] - 0s - loss: 0.4005 - acc: 0.8349     
Epoch 98/100
8000/8000 [==============================] - 0s - loss: 0.4001 - acc: 0.8350     
Epoch 99/100
8000/8000 [==============================] - 0s - loss: 0.4000 - acc: 0.8356     
Epoch 100/100
8000/8000 [==============================] - 0s - loss: 0.4001 - acc: 0.8369     






    Out[66]:





<keras.callbacks.History at 0x7f0cda198128>

Part 3 - Making predictions and evaluating the model



In [67]:

    
# Predicting the Test set results
y_pred_pro = classifier.predict(X_test)
y_pred = (y_pred_pro > 0.5)



In [68]:

    
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)



In [69]:

    
from sklearn.metrics import accuracy_score, roc_auc_score

accuracy_score(y_test, y_pred)









    Out[69]:





0.84299999999999997



In [70]:

    
roc_auc_score(y_test, y_pred_pro, average='macro')









    Out[70]:





0.79844421223731565



In [71]:

    
roc_auc_score(y_test, y_pred_pro, average='micro')









    Out[71]:





0.79844421223731565

Use the model as if we have a new customer



In [33]:

    
new_customer = np.array([[600, 40, 3, 6000, 2, 1, 1, 50000,0, 0, 1]], dtype='float')
new_customer = sc.transform(new_customer)



In [34]:

    
new_customer









    Out[34]:





array([[-0.52111599,  0.10961719, -0.68538967, -1.11983631,  0.8095029 ,
         0.64259497,  0.9687384 , -0.87203322, -0.5698444 , -0.57369368,
         0.91601335]])



In [35]:

    
new_customer_pre = classifier.predict(new_customer)



In [69]:

    
new_customer_pre[0] > 0.5









    Out[69]:





array([False], dtype=bool)

Using CV to evaluate



In [38]:

    
# Evaluating the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

def build_classifier():
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
    classifier.add(Dropout(0.3))
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
    # classifier.add(Dropout(0.3))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 100, epochs = 100, verbose=0)
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = 1)
mean = accuracies.mean()
variance = accuracies.std()



In [39]:

    
mean









    Out[39]:





0.83437499627470968



In [40]:

    
variance









    Out[40]:





0.010250767245527004

Parameter Turning



In [72]:

    
# Tuning the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
def build_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
    classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier
classifier = KerasClassifier(build_fn = build_classifier, verbose=0)


parameters = {'batch_size': [25, 32, 50],
              'epochs': [50, 100],
              'optimizer': ['adam', 'rmsprop']}
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10, n_jobs=1, verbose=10 )
grid_search = grid_search.fit(X_train, y_train)

print(grid_search.best_params_)
print(grid_search.best_score_)









    



{'batch_size': 5, 'epochs': 500, 'optimizer': 'adam'}
0.85575

{'batch_size': 5, 'epochs': 500, 'optimizer': 'adam'} 0.85575



In [ ]:

	RowNumber	CustomerId	Surname	CreditScore	Geography	Gender	Age	Tenure	Balance	NumOfProducts	HasCrCard	IsActiveMember	EstimatedSalary	Exited
0	1	15634602	Hargrave	619	France	Female	42	2	0.00	1	1	1	101348.88	1
1	2	15647311	Hill	608	Spain	Female	41	1	83807.86	1	0	1	112542.58	0
2	3	15619304	Onio	502	France	Female	42	8	159660.80	3	1	0	113931.57	1
3	4	15701354	Boni	699	France	Female	39	1	0.00	2	0	0	93826.63	0
4	5	15737888	Mitchell	850	Spain	Female	43	2	125510.82	1	1	1	79084.10	0