In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

from sklearn.model_selection import train_test_split

In [3]:
dataset = pd.read_csv('Churn_Modelling.csv')
d_X = dataset.iloc[:, 3:13]
d_y = dataset.iloc[:, 13]
d_X = pd.get_dummies(d_X)
d_X.drop(['Geography_France', 'Gender_Female'], axis=1, inplace=True)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(d_X, d_y, test_size = 0.2, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data as Data
import torchvision

In [5]:
EPOCH = 100
BATCH_SIZE = 10
LR = 0.001

In [35]:
torch_net = torch.nn.Sequential(
    torch.nn.Linear(11, 6),
    torch.nn.ReLU(),
    torch.nn.Linear(6, 2),
    torch.nn.ReLU(),
)

In [36]:
t_x_train = torch.from_numpy(X_train).float()
t_y_train = torch.from_numpy(y_train.as_matrix()).long()
t_x_test = torch.from_numpy(X_test).float()
t_y_test = torch.from_numpy(y_test.as_matrix()).long()

torch_dataset = Data.TensorDataset(data_tensor=t_x_train, target_tensor=t_y_train)
train_loader = Data.DataLoader(
    dataset=torch_dataset,
    batch_size=BATCH_SIZE, 
    shuffle=True)

In [37]:
# To use GPU
v_x_test = Variable(t_x_test).cuda()
t_y_test = t_y_test.cuda()

In [38]:
torch_net.cuda()


Out[38]:
Sequential (
  (0): Linear (11 -> 6)
  (1): ReLU ()
  (2): Linear (6 -> 2)
  (3): ReLU ()
)

In [39]:
optimizer = torch.optim.Adam(torch_net.parameters(), lr=LR)
loss_func = nn.CrossEntropyLoss()

losses_his = []

In [40]:
for epoch in range(EPOCH):
    for step, (x, y) in enumerate(train_loader):

        # !!!!!!!! Change in here !!!!!!!!! #
        b_x = Variable(x).cuda()    # Tensor on GPU
        b_y = Variable(y).cuda()    # Tensor on GPU

        output = torch_net(b_x)
        loss = loss_func(output, b_y)
        losses_his.append(loss.data[0])
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 10 == 0 and step % 500 == 0:
            test_output = torch_net(v_x_test)

            # !!!!!!!! Change in here !!!!!!!!! #
            pred_y = torch.max(F.softmax(test_output), 1)[1].cuda().data.squeeze()  # move the computation in GPU

            accuracy = sum(pred_y == t_y_test) / t_y_test.size(0)
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.4f' % accuracy)


Epoch:  0 | train loss: 1.0014 | test accuracy: 0.2025
Epoch:  0 | train loss: 0.6102 | test accuracy: 0.7980
Epoch:  10 | train loss: 0.6584 | test accuracy: 0.8635
Epoch:  10 | train loss: 0.4041 | test accuracy: 0.8650
Epoch:  20 | train loss: 0.3159 | test accuracy: 0.8630
Epoch:  20 | train loss: 0.2007 | test accuracy: 0.8600
Epoch:  30 | train loss: 0.1843 | test accuracy: 0.8585
Epoch:  30 | train loss: 0.3283 | test accuracy: 0.8615
Epoch:  40 | train loss: 0.6404 | test accuracy: 0.8625
Epoch:  40 | train loss: 0.6092 | test accuracy: 0.8615
Epoch:  50 | train loss: 0.1515 | test accuracy: 0.8590
Epoch:  50 | train loss: 0.3402 | test accuracy: 0.8575
Epoch:  60 | train loss: 0.1847 | test accuracy: 0.8590
Epoch:  60 | train loss: 0.1209 | test accuracy: 0.8610
Epoch:  70 | train loss: 0.4144 | test accuracy: 0.8590
Epoch:  70 | train loss: 0.3169 | test accuracy: 0.8595
Epoch:  80 | train loss: 0.4479 | test accuracy: 0.8610
Epoch:  80 | train loss: 0.4547 | test accuracy: 0.8590
Epoch:  90 | train loss: 0.3526 | test accuracy: 0.8605
Epoch:  90 | train loss: 0.0960 | test accuracy: 0.8610

In [20]:
test_output = torch_net(v_x_test)

In [29]:
test_output_pro = F.softmax(test_output)[:,1].data.cpu().numpy()

In [32]:
from sklearn.metrics import accuracy_score, roc_auc_score

accuracy_score(t_y_test.cpu().numpy(), test_output_pro>0.5)


Out[32]:
0.85999999999999999

In [33]:
roc_auc_score(y_test, test_output_pro, average='macro')


Out[33]:
0.86801346801346813

Use the model as if we have a new customer


In [41]:
new_customer = np.array([[600, 40, 3, 6000, 2, 1, 1, 50000,0, 0, 1]], dtype='float')
new_customer = sc.transform(new_customer)

In [42]:
new_customer = torch.from_numpy(new_customer).float()
new_customer = Variable(new_customer).cuda()

In [43]:
new_customer_pre = torch_net(new_customer)

In [50]:
F.softmax(new_customer_pre).data.cpu().numpy()


Out[50]:
array([[ 0.98490518,  0.01509479]], dtype=float32)

In [ ]:


In [6]:
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
model = XGBClassifier(max_depth=6, learning_rate=0.12, n_estimators=14, objective="binary:logistic", subsample=0.6, seed=0)
scores = cross_val_score(model, X_train, y_train, cv=10, scoring="accuracy")
print(scores.mean())


0.863622726559

In [7]:
print(np.std(scores))


0.00783351561497

In [8]:
scores


Out[8]:
array([ 0.87640449,  0.8576779 ,  0.87375   ,  0.87      ,  0.86125   ,
        0.8575    ,  0.8675    ,  0.85625   ,  0.85106383,  0.86483104])

In [ ]: