In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in

from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from collections import OrderedDict
from tensorboardX import SummaryWriter
from torchvision import transforms
import torchvision
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import torch
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from torch.nn import init
from sklearn.metrics import roc_auc_score, accuracy_score

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))
# load data
print('loading train')
train_df = pd.read_csv('../input/train.csv')
print('loading test')
test_df = pd.read_csv('../input/test.csv')
print(train_df)
print(test_df)


['train.csv', 'sample_submission.csv', 'test.csv']
loading train
loading test
             ID_code  target    var_0   ...     var_197  var_198  var_199
0            train_0       0   8.9255   ...      8.5635  12.7803  -1.0914
1            train_1       0  11.5006   ...      8.7889  18.3560   1.9518
2            train_2       0   8.6093   ...      8.2675  14.7222   0.3965
3            train_3       0  11.0604   ...     10.2922  17.9697  -8.9996
4            train_4       0   9.8369   ...      9.5031  17.9974  -8.8104
5            train_5       0  11.4763   ...      9.7670  12.5809  -4.7602
6            train_6       0  11.8091   ...      9.1143  10.8869  -3.2097
7            train_7       0  13.5580   ...      9.4237   8.6624   3.4806
8            train_8       0  16.1071   ...      8.1975  19.5114   4.8453
9            train_9       0  12.5088   ...      7.9133  16.2375  14.2514
10          train_10       0   5.0702   ...      9.2553  14.2914  -7.6652
11          train_11       0  12.7188   ...      9.6745  16.7498  -3.9728
12          train_12       0   8.7671   ...      8.4897  17.0938   4.6106
13          train_13       1  16.3699   ...      9.0419  15.6064 -10.8529
14          train_14       0  13.8080   ...      9.3439  24.4479  -5.1110
15          train_15       0   3.9416   ...      8.2899  12.9116  -4.9182
16          train_16       0   5.0615   ...      8.6367  20.2548  11.1524
17          train_17       0   8.4199   ...      9.4560  19.4505  -5.2407
18          train_18       0   4.8750   ...      8.5873  17.1516 -22.1940
19          train_19       0   4.4090   ...      9.1711  14.7352   3.9133
20          train_20       0  12.6700   ...      8.2176  22.4754 -19.9444
21          train_21       0   8.3918   ...      7.7761  17.4662  11.3979
22          train_22       0  10.2031   ...      9.5064   8.7281 -25.6523
23          train_23       0  15.0029   ...      9.4714  12.8480 -10.0357
24          train_24       0   5.9240   ...     10.2561  14.3032   2.0897
25          train_25       0   8.2703   ...      9.5360  17.8582  -3.3911
26          train_26       0  15.6567   ...      7.8821  19.3055  -7.5090
27          train_27       0  10.7166   ...      7.9933  20.2128  -1.7144
28          train_28       0   7.8010   ...      9.8715  14.0120  -4.5997
29          train_29       1   5.3301   ...      9.4513  17.4105 -14.6897
...              ...     ...      ...   ...         ...      ...      ...
199970  train_199970       0  15.5794   ...      7.8741  18.4707  -6.4407
199971  train_199971       0  14.5745   ...      7.1448  16.4238  15.5478
199972  train_199972       0   7.4206   ...      7.6474  17.9137  -8.8790
199973  train_199973       0   8.7758   ...      8.2211  16.1504  22.6103
199974  train_199974       0  16.2010   ...      9.8244  17.6050   3.5219
199975  train_199975       0   7.5238   ...      7.3537  20.0787 -15.7676
199976  train_199976       1   7.9663   ...      8.4627  14.3604  -1.6688
199977  train_199977       0   7.3884   ...      9.0411  15.8939   1.6908
199978  train_199978       0  12.2015   ...      7.5484  13.1150   6.7866
199979  train_199979       0  10.8208   ...      9.5072  17.3854  -4.0844
199980  train_199980       0   7.9618   ...      8.2107  14.5403  -8.7837
199981  train_199981       1  12.8140   ...      8.6519  16.0341   7.3809
199982  train_199982       0  11.8224   ...      8.2403  15.0018 -11.7721
199983  train_199983       0  15.3063   ...      9.7643  16.9017   6.8054
199984  train_199984       0  11.3184   ...     10.1784  17.1267 -16.5440
199985  train_199985       0   9.0249   ...      8.1901  18.5124 -15.6321
199986  train_199986       1  12.0298   ...      8.8019  15.0031  -0.3659
199987  train_199987       0   8.0438   ...      8.1765  19.5095 -13.3091
199988  train_199988       0  10.8657   ...      8.1970  14.7355   7.4554
199989  train_199989       0  11.7554   ...      6.8686  19.5728  10.0835
199990  train_199990       1  14.1475   ...      9.1627  13.8077  -1.9646
199991  train_199991       0   9.9909   ...      8.5414  13.2895  -6.7896
199992  train_199992       0  12.2825   ...     10.1758  17.4066 -11.5244
199993  train_199993       0  13.2152   ...      8.9709  14.5405   6.1149
199994  train_199994       0  12.3925   ...      9.1164  16.3170  -7.5048
199995  train_199995       0  11.4880   ...      8.5326  16.6660 -17.8661
199996  train_199996       0   4.9149   ...      6.7419  15.9054   0.3388
199997  train_199997       0  11.2232   ...      8.7155  13.8329   4.1995
199998  train_199998       0   9.7148   ...     10.0342  15.5289 -13.9001
199999  train_199999       0  10.8762   ...      8.1857  12.1284   0.1385

[200000 rows x 202 columns]
            ID_code    var_0    var_1   ...     var_197  var_198  var_199
0            test_0  11.0656   7.7798   ...     10.7200  15.4722  -8.7197
1            test_1   8.5304   1.2543   ...      9.8714  19.1293 -20.9760
2            test_2   5.4827 -10.3581   ...      7.0618  19.8956 -23.1794
3            test_3   8.5374  -1.3222   ...      9.2295  13.0168  -4.2108
4            test_4  11.7058  -0.1327   ...      7.2882  13.9260  -9.1846
5            test_5   5.9862  -2.2913   ...      9.8117  17.1127  10.8240
6            test_6   8.4624  -6.1065   ...      9.1509  18.4736   5.1499
7            test_7  17.3035  -2.4212   ...     10.4855  23.4631   0.7283
8            test_8   6.9856   0.8402   ...      9.9207  16.9865  -3.3304
9            test_9  10.3811  -6.9348   ...      9.5788  15.8146   9.3457
10          test_10   8.3431  -4.1427   ...     10.1361  12.1140  -2.4978
11          test_11  10.6137  -2.1898   ...      9.2355  15.0721  -7.3475
12          test_12  12.7465  -4.9467   ...      8.9599  16.9317 -14.0779
13          test_13  11.7836   1.9979   ...     10.0379  20.5904   6.0166
14          test_14   7.0360   1.6797   ...      8.2077  17.6097  -0.9141
15          test_15  14.8595  -4.5378   ...     10.2848  17.4932   6.0800
16          test_16  14.1732  -5.1490   ...      9.3406  21.1746  -2.0098
17          test_17   9.0936  -8.7414   ...      9.3114  23.0545  -2.3171
18          test_18  15.7875   0.1671   ...      9.3848  14.4007  11.2567
19          test_19  13.3874   1.0716   ...      8.1116  20.5129   5.4945
20          test_20   8.0259  -4.6740   ...      7.8690  18.1980  -3.7655
21          test_21  14.3356   0.2317   ...     10.7644  18.0992  -7.6905
22          test_22  10.4255  -6.1758   ...      9.0132  16.9635   3.2523
23          test_23  12.3322  -6.3835   ...      8.2489  22.4405  -5.4524
24          test_24  14.1844  -9.1044   ...      7.9695  12.6078   7.3475
25          test_25  10.0029   0.2530   ...      6.9239  18.6460 -17.7609
26          test_26   6.9056  -4.8626   ...      8.0712  16.6316   7.8962
27          test_27   8.7562  -3.0647   ...      8.7277  16.8255 -18.6596
28          test_28   9.7243  -1.5151   ...      8.6899  14.5739  16.2884
29          test_29  13.2430   1.2738   ...      8.4667  15.1478   6.2962
...             ...      ...      ...   ...         ...      ...      ...
199970  test_199970  12.7260  -1.6706   ...      9.1059  19.3888  -7.6292
199971  test_199971   9.4700  -6.7655   ...      8.2742  18.2273  -1.0124
199972  test_199972  13.3243   1.0870   ...      9.7116  15.5697  -0.7362
199973  test_199973  14.2830  -1.8421   ...      7.8924  16.0279   7.8535
199974  test_199974   4.5171  -5.2068   ...      8.0088  14.3127  -1.2940
199975  test_199975  13.4796   2.7000   ...      8.7922  19.6428 -24.7859
199976  test_199976  12.6337  -6.9793   ...     10.6610  11.5385 -23.6067
199977  test_199977  10.8078  -4.6108   ...     10.0748  16.0592 -14.0012
199978  test_199978   9.9317  -2.2815   ...      7.7696  10.9202 -23.5055
199979  test_199979  10.5933  -1.2672   ...      8.3203  13.0791  -5.1262
199980  test_199980  13.4136   5.3912   ...      9.7953  16.9352   0.2655
199981  test_199981   7.9218  -5.7464   ...      8.1402  19.2653 -30.3989
199982  test_199982   7.2189   1.6606   ...      8.3144  19.3602  -3.0895
199983  test_199983  11.8527   5.4321   ...      9.2620  14.0587   5.5770
199984  test_199984  12.7445  -6.1135   ...      9.1933  13.7584   4.3670
199985  test_199985  14.8983   2.1302   ...      8.9479  12.8983   8.3530
199986  test_199986  19.2884  -2.8384   ...      8.3441  14.5823   0.7454
199987  test_199987  11.2942   3.6321   ...      7.9463  14.1967   9.8560
199988  test_199988   6.4535  -2.1707   ...      9.4572  13.1265  -6.5024
199989  test_199989   9.0436  -3.0491   ...      8.1526   9.0933   0.8644
199990  test_199990   5.5416   1.7340   ...      9.6883  12.6723 -16.4310
199991  test_199991   8.7935  -4.0646   ...      8.6422  13.7302 -21.5712
199992  test_199992  16.4229  -5.0254   ...     10.3218   8.2577   5.2651
199993  test_199993  14.6764  -8.1066   ...     10.4270  17.4970 -13.0074
199994  test_199994   8.2964  -2.3119   ...      7.6434  13.0871  -4.3982
199995  test_199995  13.1678   1.0136   ...      9.1568  18.2102   4.8801
199996  test_199996   9.7171  -9.1462   ...      9.1112  18.1740 -20.7689
199997  test_199997  11.6360   2.2769   ...      9.1933  11.7905 -22.2762
199998  test_199998  13.5745  -0.5134   ...      8.1079   8.7735  -0.2122
199999  test_199999  10.4664   1.8070   ...     10.3378  14.3340  -7.7094

[200000 rows x 201 columns]

In [2]:
train_drop_label = train_df.drop(['target', 'ID_code'], axis=1) # Features
test_drop_label = test_df.drop(['ID_code'], axis=1)
all_df = pd.concat([train_drop_label, test_drop_label], axis=0)
train_len = train_df.count()[0]
test_len= test_df.count()[0]
print(train_len, test_len)
scaler = preprocessing.StandardScaler()
all_scaled = pd.DataFrame(scaler.fit_transform(all_df), columns=all_df.columns)

train_x,test_x,train_y,test_y= train_test_split(all_scaled[:train_len], train_df.target, test_size=.01, shuffle=True)

validation_x_tensor = torch.Tensor(test_x.values)
train_x_tensor = torch.Tensor(train_x.values)
test_x_tensor = torch.Tensor(all_scaled[train_len:].values)


200000 200000

In [3]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        
        self.model = nn.Sequential(
             nn.Linear(200, 60),
             nn.ReLU(),
             nn.BatchNorm1d(60),
             nn.Linear(60, 40),
             nn.ReLU(),
             nn.BatchNorm1d(40),
             nn.Linear(40, 20),
             nn.ReLU(),
             nn.BatchNorm1d(20),
             nn.Linear(20, 1),
             nn.Sigmoid(),
        )
        init.xavier_uniform_(self.model[0].weight)
        init.xavier_uniform_(self.model[3].weight)
        init.xavier_uniform_(self.model[6].weight)
        init.xavier_uniform_(self.model[9].weight)


    def forward(self, x):
        return self.model(x)
    
def train(train_loader, model, loss_fn, optimizer, scheduler):
    model.train(True)
    total_loss = 0

    for data, target in train_loader:
        data = data.cuda()
        target = target.cuda()
        optimizer.zero_grad()
        outputs = model(data)
        loss = loss_fn(outputs, target)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
    
    scheduler.step(total_loss)
    return total_loss

def evaluate(model, X = validation_x_tensor, mode='Train'):
    model.train(False)
    model.eval()
    with torch.no_grad():
        train_pred = model(train_x_tensor.cuda()).cpu().numpy().ravel()
        test_pred = model(X.cuda()).cpu().numpy().ravel()
    if mode=='Train':
        return roc_auc_score(train_y, train_pred), accuracy_score(train_y, train_pred.round(0)), roc_auc_score(test_y, test_pred), accuracy_score(test_y, test_pred.round(0))
    else:
        return test_pred
    

def fit(train_loader, model, loss_fn, optimizer, n_epochs, scheduler):

    train_losses = []
    train_accuracies = []
    train_rocs = []
    test_accuracies = []
    test_rocs = []

    for epoch in range(n_epochs):
        
        train_loss = train(train_loader, model, loss_fn, optimizer, scheduler)
        train_roc, train_accuracy, test_roc, test_accuracy = evaluate(model)
        train_accuracies.append(train_accuracy)
        train_losses.append(train_loss)
        train_rocs.append(train_roc)
        test_accuracies.append(test_accuracy)
        test_rocs.append(test_roc)
        
        print('Epoch: {}/{}, loss: {:.4f}'.format(epoch+1, n_epochs, train_loss))
        print('Train: roc: {:.4f}, Accuracy: {:.4f}'.format(train_roc, train_accuracy))
        print('Test:  roc: {:.4f}, Accuracy: {:.4f}'.format(test_roc, test_accuracy))

    return train_losses, train_accuracies, train_rocs, test_accuracies, test_rocs

class TrainDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.Tensor(x.values)
        self.y = torch.Tensor(y.values.reshape(-1, 1))

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]
    

batch_size = 1024
n_epochs = 10
learning_rate = 0.01

model = Network().cuda()
loss_fn = torch.nn.BCELoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=1e-3)
scheduler=torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.9)

train_set = TrainDataset(train_x, train_y)
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)

train_losses, train_accuracies, train_rocs, test_accuracies, test_rocs = fit(train_loader, model, loss_fn, optimizer, n_epochs, scheduler)


Epoch: 1/10, loss: 67.9494
Train: roc: 0.6270, Accuracy: 0.8996
Test:  roc: 0.6117, Accuracy: 0.8925
Epoch: 2/10, loss: 57.3031
Train: roc: 0.7953, Accuracy: 0.9060
Test:  roc: 0.7838, Accuracy: 0.9030
Epoch: 3/10, loss: 48.9115
Train: roc: 0.8409, Accuracy: 0.9125
Test:  roc: 0.8462, Accuracy: 0.9110
Epoch: 4/10, loss: 46.5491
Train: roc: 0.8507, Accuracy: 0.9140
Test:  roc: 0.8605, Accuracy: 0.9100
Epoch: 5/10, loss: 45.7689
Train: roc: 0.8550, Accuracy: 0.9146
Test:  roc: 0.8661, Accuracy: 0.9115
Epoch: 6/10, loss: 45.3911
Train: roc: 0.8580, Accuracy: 0.9150
Test:  roc: 0.8687, Accuracy: 0.9105
Epoch: 7/10, loss: 45.0657
Train: roc: 0.8605, Accuracy: 0.9154
Test:  roc: 0.8725, Accuracy: 0.9100
Epoch: 8/10, loss: 44.8867
Train: roc: 0.8624, Accuracy: 0.9155
Test:  roc: 0.8731, Accuracy: 0.9110
Epoch: 9/10, loss: 44.6741
Train: roc: 0.8640, Accuracy: 0.9157
Test:  roc: 0.8742, Accuracy: 0.9085
Epoch: 10/10, loss: 44.4922
Train: roc: 0.8655, Accuracy: 0.9159
Test:  roc: 0.8752, Accuracy: 0.9080

In [4]:
def show_figure(running_loss, title):
    x = np.array([i for i in range(len(running_loss))])
    y = np.array(running_loss)
    plt.figure()
    plt.plot(x, y, c='b')
    plt.axis()
    plt.title(title)
    plt.xlabel('step')
    plt.ylabel('value')
    plt.show()

show_figure(train_losses, 'Loss')
show_figure(train_rocs, 'Train roc')
show_figure(test_rocs, 'Test roc')
show_figure(train_accuracies, 'Train accuracy')
show_figure(test_accuracies, 'Test accuracy')



In [5]:
with torch.no_grad():
    model.eval()

    ids = []
    ones = 0
    zeros = 0
    res = evaluate(model, X=test_x_tensor, mode='Test')
    dataframe = pd.DataFrame({'ID_code': test_df.ID_code, 'target': res})
    dataframe.to_csv("result.csv", index=False, sep=',')
    print(dataframe)


            ID_code    target
0            test_0  0.176798
1            test_1  0.315358
2            test_2  0.072592
3            test_3  0.149532
4            test_4  0.051118
5            test_5  0.009985
6            test_6  0.012215
7            test_7  0.093146
8            test_8  0.006897
9            test_9  0.013564
10          test_10  0.117713
11          test_11  0.117769
12          test_12  0.141268
13          test_13  0.010101
14          test_14  0.021106
15          test_15  0.034388
16          test_16  0.231952
17          test_17  0.014844
18          test_18  0.103201
19          test_19  0.035062
20          test_20  0.245285
21          test_21  0.071156
22          test_22  0.021370
23          test_23  0.028503
24          test_24  0.083491
25          test_25  0.021953
26          test_26  0.041413
27          test_27  0.014977
28          test_28  0.054742
29          test_29  0.241326
...             ...       ...
199970  test_199970  0.156679
199971  test_199971  0.193740
199972  test_199972  0.017636
199973  test_199973  0.129436
199974  test_199974  0.014760
199975  test_199975  0.101166
199976  test_199976  0.318783
199977  test_199977  0.074974
199978  test_199978  0.051884
199979  test_199979  0.494899
199980  test_199980  0.072021
199981  test_199981  0.057411
199982  test_199982  0.055905
199983  test_199983  0.019689
199984  test_199984  0.016477
199985  test_199985  0.167013
199986  test_199986  0.197121
199987  test_199987  0.063829
199988  test_199988  0.032056
199989  test_199989  0.099043
199990  test_199990  0.017729
199991  test_199991  0.045076
199992  test_199992  0.116424
199993  test_199993  0.019533
199994  test_199994  0.254418
199995  test_199995  0.018908
199996  test_199996  0.064788
199997  test_199997  0.009052
199998  test_199998  0.033356
199999  test_199999  0.095835

[200000 rows x 2 columns]