In [1]:
import pandas as pd
import numpy as np
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
pd.set_option("display.max_rows",40)
%matplotlib inline


Using TensorFlow backend.

In [2]:
class dataset:
    kdd_train_2labels = pd.read_pickle("dataset/kdd_train_2labels.pkl")
    #kdd_train_2labels_y = pd.read_pickle("dataset/kdd_train_2labels_y.pkl")
    
    kdd_test_2labels = pd.read_pickle("dataset/kdd_test_2labels.pkl")
    #kdd_test_2labels_y = pd.read_pickle("dataset/kdd_test_2labels_y.pkl")

In [3]:
from sklearn.preprocessing import LabelEncoder

le_2labels = LabelEncoder()
#dataset.y_train_2labels = le_2labels.fit_transform(dataset.kdd_train_2labels_y)
#dataset.y_test_2labels = le_2labels.transform(dataset.kdd_test_2labels_y)

In [4]:
from itertools import product
from sklearn.model_selection import train_test_split

class preprocessing:
    x_train = dataset.kdd_train_2labels.iloc[:,:-2].values
    y_train = dataset.kdd_train_2labels.iloc[:,-2:].values #np.array(dataset.y_train_2labels)

    x_test, y_test = (dataset.kdd_test_2labels.iloc[:,:-2].values, 
                      dataset.kdd_test_2labels.iloc[:,-2:].values #np.array(dataset.y_test_2labels)
                     )

In [5]:
from collections import namedtuple
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from keras import regularizers

class Train:
    score = namedtuple("score", ['epoch', 'no_of_features','hidden_layers','train_score', 'test_score'])
    #model_detail = namedtuple("model_detail", ['epoch', 'no_of_features','hidden_layers', 'model'])
    scores = []
    predictions = {}
    #models = []
    def execute(x_train, x_test, 
                y_train, y_test, 
                input_dim, no_of_features, hidden_layers,
                epochs = 5, keep_prob = 0.9):
        
        print("Training for no_of_features: {}, hidden_layer: {}".format(no_of_features, hidden_layers
                                                                        ))
        model = Sequential()
        model.add(Dense(no_of_features, input_dim=input_dim, activation='relu'))
        model.add(Dropout(keep_prob))
        #model.add(BatchNormalization())
        
        for i in range(hidden_layers - 1):
            model.add(Dense(no_of_features, activation='relu'))
            model.add(Dropout(keep_prob))
            #model.add(BatchNormalization())

        
        model.add(Dense(2, activation='softmax'))
        
        optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-04, decay=0.1)
        model.compile(loss='mean_squared_error',
                      optimizer=optimizer,
                      metrics=['accuracy'])

        x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=.1)
        
        model.fit(x_train, y_train,
                  validation_data=(x_test, y_test),
                  epochs=epochs,
                  batch_size=128,
                  verbose = 1)
        
        curr_score_valid = model.evaluate(x_valid, y_valid) #, batch_size=128)
        curr_score_test = model.evaluate(x_test, y_test) #, batch_size=128)
        pred_value = model.predict(x_test)
        
        print("\n Train Accuracy: {}, Test Accuracy: {}".format(curr_score_valid[-1], curr_score_test[-1])  )
        
        Train.scores.append(Train.score(epochs,no_of_features,hidden_layers,curr_score_valid[1], curr_score_test[1]))
        #Train.models.append(Train.model_detail(epochs,no_of_features,hidden_layers,model))
        
        y_pred = pred_value
        #y_pred = pred_value[:,-1]
        #y_pred[y_pred >= pred_value[:,-1].mean()] = 1
        #y_pred[y_pred < pred_value[:,-1].mean()] = 0
        
        curr_pred = pd.DataFrame({"Attack_prob":y_pred[:,-2], "Normal_prob":y_pred[:,-1]})
        Train.predictions.update({"{}_{}_{}".format(epochs,f,h):curr_pred})
        Train.model = model

In [6]:
#features_arr = [4, 8, 16, 32, 64, 128, 256, 1024]
#hidden_layers_arr = [2, 4, 6, 50, 100]

features_arr = [2, 4, 8, 16, 32]
hidden_layers_arr = [2, 6, 10]


for f, h in product(features_arr, hidden_layers_arr):
    Train.execute(preprocessing.x_train, preprocessing.x_test, 
                  preprocessing.y_train, preprocessing.y_test, 
                 122, f, h)


Training for no_of_features: 2, hidden_layer: 2
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 1s - loss: 0.2538 - acc: 0.5349 - val_loss: 0.5448 - val_acc: 0.4308
Epoch 2/5
113375/113375 [==============================] - 1s - loss: 0.2537 - acc: 0.5349 - val_loss: 0.5441 - val_acc: 0.4308
Epoch 3/5
113375/113375 [==============================] - 1s - loss: 0.2535 - acc: 0.5349 - val_loss: 0.5434 - val_acc: 0.4308
Epoch 4/5
113375/113375 [==============================] - 1s - loss: 0.2532 - acc: 0.5349 - val_loss: 0.5432 - val_acc: 0.4308
Epoch 5/5
113375/113375 [==============================] - 1s - loss: 0.2535 - acc: 0.5349 - val_loss: 0.5431 - val_acc: 0.4308
20672/22544 [==========================>...] - ETA: 0s
 Train Accuracy: 0.5319098269566598, Test Accuracy: 0.43075762952448543
Training for no_of_features: 2, hidden_layer: 6
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 2s - loss: 0.2495 - acc: 0.5338 - val_loss: 0.2517 - val_acc: 0.4308
Epoch 2/5
113375/113375 [==============================] - 1s - loss: 0.2493 - acc: 0.5341 - val_loss: 0.2520 - val_acc: 0.4308
Epoch 3/5
113375/113375 [==============================] - 1s - loss: 0.2493 - acc: 0.5341 - val_loss: 0.2521 - val_acc: 0.4308
Epoch 4/5
113375/113375 [==============================] - 1s - loss: 0.2492 - acc: 0.5341 - val_loss: 0.2522 - val_acc: 0.4308
Epoch 5/5
113375/113375 [==============================] - 1s - loss: 0.2492 - acc: 0.5341 - val_loss: 0.2523 - val_acc: 0.4308
21024/22544 [==========================>...] - ETA: 0s
 Train Accuracy: 0.5388156850577575, Test Accuracy: 0.43075762952448543
Training for no_of_features: 2, hidden_layer: 10
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 3s - loss: 0.2494 - acc: 0.5341 - val_loss: 0.2518 - val_acc: 0.4308
Epoch 2/5
113375/113375 [==============================] - 2s - loss: 0.2493 - acc: 0.5342 - val_loss: 0.2521 - val_acc: 0.4308
Epoch 3/5
113375/113375 [==============================] - 2s - loss: 0.2492 - acc: 0.5342 - val_loss: 0.2522 - val_acc: 0.4308
Epoch 4/5
113375/113375 [==============================] - 2s - loss: 0.2492 - acc: 0.5342 - val_loss: 0.2523 - val_acc: 0.4308
Epoch 5/5
113375/113375 [==============================] - 2s - loss: 0.2492 - acc: 0.5342 - val_loss: 0.2524 - val_acc: 0.4308
21216/22544 [===========================>..] - ETA: 0s
 Train Accuracy: 0.5378631528814097, Test Accuracy: 0.43075762952448543
Training for no_of_features: 4, hidden_layer: 2
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 1s - loss: 0.2535 - acc: 0.5372 - val_loss: 0.4815 - val_acc: 0.3704
Epoch 2/5
113375/113375 [==============================] - 1s - loss: 0.2533 - acc: 0.5365 - val_loss: 0.4820 - val_acc: 0.3699
Epoch 3/5
113375/113375 [==============================] - 1s - loss: 0.2531 - acc: 0.5364 - val_loss: 0.4823 - val_acc: 0.3699
Epoch 4/5
113375/113375 [==============================] - 1s - loss: 0.2529 - acc: 0.5371 - val_loss: 0.4825 - val_acc: 0.3696
Epoch 5/5
113375/113375 [==============================] - 1s - loss: 0.2536 - acc: 0.5355 - val_loss: 0.4827 - val_acc: 0.3694
20512/22544 [==========================>...] - ETA: 0s
 Train Accuracy: 0.2563899031686942, Test Accuracy: 0.3694109297374024
Training for no_of_features: 4, hidden_layer: 6
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 2s - loss: 0.2495 - acc: 0.5347 - val_loss: 0.4262 - val_acc: 0.5467
Epoch 2/5
113375/113375 [==============================] - 1s - loss: 0.2493 - acc: 0.5351 - val_loss: 0.4262 - val_acc: 0.5465
Epoch 3/5
113375/113375 [==============================] - 1s - loss: 0.2493 - acc: 0.5351 - val_loss: 0.4262 - val_acc: 0.5465
Epoch 4/5
113375/113375 [==============================] - 1s - loss: 0.2492 - acc: 0.5351 - val_loss: 0.4262 - val_acc: 0.5464
Epoch 5/5
113375/113375 [==============================] - 1s - loss: 0.2492 - acc: 0.5351 - val_loss: 0.4263 - val_acc: 0.5464
21920/22544 [============================>.] - ETA: 0s
 Train Accuracy: 0.4786474043309717, Test Accuracy: 0.5464425124201562
Training for no_of_features: 4, hidden_layer: 10
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 3s - loss: 0.2495 - acc: 0.5346 - val_loss: 0.2851 - val_acc: 0.4308
Epoch 2/5
113375/113375 [==============================] - 2s - loss: 0.2493 - acc: 0.5348 - val_loss: 0.2853 - val_acc: 0.4308
Epoch 3/5
113375/113375 [==============================] - 2s - loss: 0.2492 - acc: 0.5348 - val_loss: 0.2854 - val_acc: 0.4308
Epoch 4/5
113375/113375 [==============================] - 2s - loss: 0.2492 - acc: 0.5348 - val_loss: 0.2855 - val_acc: 0.4308
Epoch 5/5
113375/113375 [==============================] - 2s - loss: 0.2492 - acc: 0.5348 - val_loss: 0.2856 - val_acc: 0.4308
22272/22544 [============================>.] - ETA: 0s
 Train Accuracy: 0.5327829814492795, Test Accuracy: 0.43075762952448543
Training for no_of_features: 8, hidden_layer: 2
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 2s - loss: 0.2782 - acc: 0.5296 - val_loss: 0.7041 - val_acc: 0.2791
Epoch 2/5
113375/113375 [==============================] - 1s - loss: 0.2793 - acc: 0.5284 - val_loss: 0.7044 - val_acc: 0.2786
Epoch 3/5
113375/113375 [==============================] - 1s - loss: 0.2787 - acc: 0.5283 - val_loss: 0.7049 - val_acc: 0.2781
Epoch 4/5
113375/113375 [==============================] - 1s - loss: 0.2780 - acc: 0.5286 - val_loss: 0.7051 - val_acc: 0.2779
Epoch 5/5
113375/113375 [==============================] - 1s - loss: 0.2786 - acc: 0.5285 - val_loss: 0.7053 - val_acc: 0.2777
21824/22544 [============================>.] - ETA: 0s
 Train Accuracy: 0.21773297348785522, Test Accuracy: 0.2777235628105039
Training for no_of_features: 8, hidden_layer: 6
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 2s - loss: 0.2498 - acc: 0.5345 - val_loss: 0.4045 - val_acc: 0.5687
Epoch 2/5
113375/113375 [==============================] - 2s - loss: 0.2496 - acc: 0.5349 - val_loss: 0.4043 - val_acc: 0.5685
Epoch 3/5
113375/113375 [==============================] - 2s - loss: 0.2495 - acc: 0.5347 - val_loss: 0.4042 - val_acc: 0.5684
Epoch 4/5
113375/113375 [==============================] - 2s - loss: 0.2496 - acc: 0.5348 - val_loss: 0.4042 - val_acc: 0.5684
Epoch 5/5
113375/113375 [==============================] - 1s - loss: 0.2495 - acc: 0.5348 - val_loss: 0.4041 - val_acc: 0.5683
22048/22544 [============================>.] - ETA: 0s
 Train Accuracy: 0.46713764089064896, Test Accuracy: 0.5683108587650816
Training for no_of_features: 8, hidden_layer: 10
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 3s - loss: 0.2495 - acc: 0.5337 - val_loss: 0.2601 - val_acc: 0.4308
Epoch 2/5
113375/113375 [==============================] - 2s - loss: 0.2493 - acc: 0.5339 - val_loss: 0.2603 - val_acc: 0.4308
Epoch 3/5
113375/113375 [==============================] - 2s - loss: 0.2493 - acc: 0.5338 - val_loss: 0.2605 - val_acc: 0.4308
Epoch 4/5
113375/113375 [==============================] - 2s - loss: 0.2492 - acc: 0.5338 - val_loss: 0.2606 - val_acc: 0.4308
Epoch 5/5
113375/113375 [==============================] - 2s - loss: 0.2492 - acc: 0.5339 - val_loss: 0.2607 - val_acc: 0.4308
22272/22544 [============================>.] - ETA: 0s
 Train Accuracy: 0.5411970153945385, Test Accuracy: 0.43075762952448543
Training for no_of_features: 16, hidden_layer: 2
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 2s - loss: 0.3239 - acc: 0.5115 - val_loss: 0.6708 - val_acc: 0.3054
Epoch 2/5
113375/113375 [==============================] - 1s - loss: 0.3185 - acc: 0.5118 - val_loss: 0.6763 - val_acc: 0.2987
Epoch 3/5
113375/113375 [==============================] - 1s - loss: 0.3176 - acc: 0.5130 - val_loss: 0.6755 - val_acc: 0.2999
Epoch 4/5
113375/113375 [==============================] - 1s - loss: 0.3169 - acc: 0.5119 - val_loss: 0.6793 - val_acc: 0.2956
Epoch 5/5
113375/113375 [==============================] - 1s - loss: 0.3161 - acc: 0.5123 - val_loss: 0.6761 - val_acc: 0.2991
21536/22544 [===========================>..] - ETA: 0s
 Train Accuracy: 0.36537545642165425, Test Accuracy: 0.2991483321504613
Training for no_of_features: 16, hidden_layer: 6
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 3s - loss: 0.2571 - acc: 0.5335 - val_loss: 0.5589 - val_acc: 0.4308
Epoch 2/5
113375/113375 [==============================] - 2s - loss: 0.2568 - acc: 0.5333 - val_loss: 0.5589 - val_acc: 0.4308
Epoch 3/5
113375/113375 [==============================] - 2s - loss: 0.2571 - acc: 0.5330 - val_loss: 0.5589 - val_acc: 0.4308
Epoch 4/5
113375/113375 [==============================] - 2s - loss: 0.2570 - acc: 0.5333 - val_loss: 0.5589 - val_acc: 0.4308
Epoch 5/5
113375/113375 [==============================] - 2s - loss: 0.2573 - acc: 0.5331 - val_loss: 0.5589 - val_acc: 0.4308
21472/22544 [===========================>..] - ETA: 0s
 Train Accuracy: 0.5387363073030603, Test Accuracy: 0.43075762952448543
Training for no_of_features: 16, hidden_layer: 10
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 4s - loss: 0.2503 - acc: 0.5328 - val_loss: 0.3608 - val_acc: 0.2257
Epoch 2/5
113375/113375 [==============================] - 2s - loss: 0.2502 - acc: 0.5339 - val_loss: 0.3608 - val_acc: 0.2322
Epoch 3/5
113375/113375 [==============================] - 2s - loss: 0.2498 - acc: 0.5342 - val_loss: 0.3607 - val_acc: 0.2352
Epoch 4/5
113375/113375 [==============================] - 2s - loss: 0.2499 - acc: 0.5340 - val_loss: 0.3608 - val_acc: 0.2365
Epoch 5/5
113375/113375 [==============================] - 2s - loss: 0.2499 - acc: 0.5337 - val_loss: 0.3609 - val_acc: 0.2378
21472/22544 [===========================>..] - ETA: 0s
 Train Accuracy: 0.3305286553373865, Test Accuracy: 0.23784599006387508
Training for no_of_features: 32, hidden_layer: 2
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 2s - loss: 0.4121 - acc: 0.5102 - val_loss: 0.5852 - val_acc: 0.3400
Epoch 2/5
113375/113375 [==============================] - 2s - loss: 0.4123 - acc: 0.5106 - val_loss: 0.5696 - val_acc: 0.3698
Epoch 3/5
113375/113375 [==============================] - 1s - loss: 0.4115 - acc: 0.5111 - val_loss: 0.5624 - val_acc: 0.3886
Epoch 4/5
113375/113375 [==============================] - 1s - loss: 0.4119 - acc: 0.5104 - val_loss: 0.5600 - val_acc: 0.3942
Epoch 5/5
113375/113375 [==============================] - 1s - loss: 0.4122 - acc: 0.5110 - val_loss: 0.5551 - val_acc: 0.4068
22336/22544 [============================>.] - ETA: 0s
 Train Accuracy: 0.2925861248006365, Test Accuracy: 0.40680447125621005
Training for no_of_features: 32, hidden_layer: 6
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 4s - loss: 0.3145 - acc: 0.5191 - val_loss: 0.5507 - val_acc: 0.3450
Epoch 2/5
113375/113375 [==============================] - 2s - loss: 0.3136 - acc: 0.5189 - val_loss: 0.5497 - val_acc: 0.3439
Epoch 3/5
113375/113375 [==============================] - 2s - loss: 0.3141 - acc: 0.5197 - val_loss: 0.5492 - val_acc: 0.3437
Epoch 4/5
113375/113375 [==============================] - 2s - loss: 0.3141 - acc: 0.5184 - val_loss: 0.5494 - val_acc: 0.3436
Epoch 5/5
113375/113375 [==============================] - 2s - loss: 0.3133 - acc: 0.5193 - val_loss: 0.5492 - val_acc: 0.3436
21920/22544 [============================>.] - ETA: 0s
 Train Accuracy: 0.28599777742025684, Test Accuracy: 0.3435947480482612
Training for no_of_features: 32, hidden_layer: 10
Train on 113375 samples, validate on 22544 samples
Epoch 1/5
113375/113375 [==============================] - 5s - loss: 0.2771 - acc: 0.5308 - val_loss: 0.4092 - val_acc: 0.5642
Epoch 2/5
113375/113375 [==============================] - 3s - loss: 0.2771 - acc: 0.5314 - val_loss: 0.4092 - val_acc: 0.5632
Epoch 3/5
113375/113375 [==============================] - 4s - loss: 0.2771 - acc: 0.5312 - val_loss: 0.4092 - val_acc: 0.5628
Epoch 4/5
113375/113375 [==============================] - 4s - loss: 0.2759 - acc: 0.5329 - val_loss: 0.4091 - val_acc: 0.5624
Epoch 5/5
113375/113375 [==============================] - 3s - loss: 0.2770 - acc: 0.5312 - val_loss: 0.4090 - val_acc: 0.5621
21312/22544 [===========================>..] - ETA: 0s
 Train Accuracy: 0.4657088426970966, Test Accuracy: 0.5621451383960255

In [7]:
pd.DataFrame(Train.scores).sort('test_score', ascending=False)


/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[7]:
epoch no_of_features hidden_layers train_score test_score
7 5 8 6 0.467138 0.568311
14 5 32 10 0.465709 0.562145
4 5 4 6 0.478647 0.546443
0 5 2 2 0.531910 0.430758
1 5 2 6 0.538816 0.430758
2 5 2 10 0.537863 0.430758
5 5 4 10 0.532783 0.430758
8 5 8 10 0.541197 0.430758
10 5 16 6 0.538736 0.430758
12 5 32 2 0.292586 0.406804
3 5 4 2 0.256390 0.369411
13 5 32 6 0.285998 0.343595
9 5 16 2 0.365375 0.299148
6 5 8 2 0.217733 0.277724
11 5 16 10 0.330529 0.237846

In [8]:
#for m in Train.models:
#    m.model.save("dataset/keras_model_epoch_{}_no_of_features_{}_hidden_layers_{}".format(m.epoch,
#                                                                                         m.no_of_features,
#                                                                                         m.hidden_layers))

#SVG(model_to_dot(Train.model).create(prog='dot', format='svg'))


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-8-e5082b1f9333> in <module>()
      4 #                                                                                         m.hidden_layers))
      5 
----> 6 SVG(model_to_dot(Train.model).create(prog='dot', format='svg'))

/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/keras/utils/vis_utils.py in model_to_dot(model, show_shapes, show_layer_names)
     33     from ..models import Sequential
     34 
---> 35     _check_pydot()
     36     dot = pydot.Dot()
     37     dot.set('rankdir', 'TB')

/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/keras/utils/vis_utils.py in _check_pydot()
     15 def _check_pydot():
     16     if not (pydot and pydot.find_graphviz()):
---> 17         raise ImportError('Failed to import pydot. You must install pydot'
     18                           ' and graphviz for `pydotprint` to work.')
     19 

ImportError: Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.

In [ ]:
pd.Panel(Train.predictions).to_pickle("dataset/keras_dense_nsl_kdd_predictions.pkl")
pd.DataFrame(Train.scores).to_pickle("dataset/keras_dense_nsl_kdd_scores.pkl")