In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
from sklearn.model_selection import train_test_split
In [3]:
dataset = pd.read_csv('Churn_Modelling.csv')
d_X = dataset.iloc[:, 3:13]
d_y = dataset.iloc[:, 13]
d_X = pd.get_dummies(d_X)
d_X.drop(['Geography_France', 'Gender_Female'], axis=1, inplace=True)
In [4]:
X_train, X_test, y_train, y_test = train_test_split(d_X, d_y, test_size = 0.2, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data as Data
import torchvision
In [5]:
EPOCH = 100
BATCH_SIZE = 10
LR = 0.001
In [35]:
torch_net = torch.nn.Sequential(
torch.nn.Linear(11, 6),
torch.nn.ReLU(),
torch.nn.Linear(6, 2),
torch.nn.ReLU(),
)
In [36]:
t_x_train = torch.from_numpy(X_train).float()
t_y_train = torch.from_numpy(y_train.as_matrix()).long()
t_x_test = torch.from_numpy(X_test).float()
t_y_test = torch.from_numpy(y_test.as_matrix()).long()
torch_dataset = Data.TensorDataset(data_tensor=t_x_train, target_tensor=t_y_train)
train_loader = Data.DataLoader(
dataset=torch_dataset,
batch_size=BATCH_SIZE,
shuffle=True)
In [37]:
# To use GPU
v_x_test = Variable(t_x_test).cuda()
t_y_test = t_y_test.cuda()
In [38]:
torch_net.cuda()
Out[38]:
In [39]:
optimizer = torch.optim.Adam(torch_net.parameters(), lr=LR)
loss_func = nn.CrossEntropyLoss()
losses_his = []
In [40]:
for epoch in range(EPOCH):
for step, (x, y) in enumerate(train_loader):
# !!!!!!!! Change in here !!!!!!!!! #
b_x = Variable(x).cuda() # Tensor on GPU
b_y = Variable(y).cuda() # Tensor on GPU
output = torch_net(b_x)
loss = loss_func(output, b_y)
losses_his.append(loss.data[0])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 10 == 0 and step % 500 == 0:
test_output = torch_net(v_x_test)
# !!!!!!!! Change in here !!!!!!!!! #
pred_y = torch.max(F.softmax(test_output), 1)[1].cuda().data.squeeze() # move the computation in GPU
accuracy = sum(pred_y == t_y_test) / t_y_test.size(0)
print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.4f' % accuracy)
In [20]:
test_output = torch_net(v_x_test)
In [29]:
test_output_pro = F.softmax(test_output)[:,1].data.cpu().numpy()
In [32]:
from sklearn.metrics import accuracy_score, roc_auc_score
accuracy_score(t_y_test.cpu().numpy(), test_output_pro>0.5)
Out[32]:
In [33]:
roc_auc_score(y_test, test_output_pro, average='macro')
Out[33]:
In [41]:
new_customer = np.array([[600, 40, 3, 6000, 2, 1, 1, 50000,0, 0, 1]], dtype='float')
new_customer = sc.transform(new_customer)
In [42]:
new_customer = torch.from_numpy(new_customer).float()
new_customer = Variable(new_customer).cuda()
In [43]:
new_customer_pre = torch_net(new_customer)
In [50]:
F.softmax(new_customer_pre).data.cpu().numpy()
Out[50]:
In [ ]:
In [6]:
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
model = XGBClassifier(max_depth=6, learning_rate=0.12, n_estimators=14, objective="binary:logistic", subsample=0.6, seed=0)
scores = cross_val_score(model, X_train, y_train, cv=10, scoring="accuracy")
print(scores.mean())
In [7]:
print(np.std(scores))
In [8]:
scores
Out[8]:
In [ ]: