Read Data Sample


In [1]:
import pandas as pd
import numpy as np
import os
from collections import namedtuple
pd.set_option("display.max_rows",100)
%matplotlib inline

In [2]:
%%bash
rm dataset/scores/tf_dense_only_nsl_kdd_scores_all-.pkl

In [3]:
class dataset:
    kdd_train_2labels = pd.read_pickle("dataset/kdd_train__2labels.pkl")
    kdd_test_2labels = pd.read_pickle("dataset/kdd_test_2labels.pkl")
    kdd_test__2labels = pd.read_pickle("dataset/kdd_test__2labels.pkl")

    kdd_train_5labels = pd.read_pickle("dataset/kdd_train_5labels.pkl")
    kdd_test_5labels = pd.read_pickle("dataset/kdd_test_5labels.pkl")

In [4]:
dataset.kdd_train_2labels.shape


Out[4]:
(25192, 124)

In [5]:
dataset.kdd_test_2labels.shape


Out[5]:
(22544, 124)

In [6]:
from sklearn import model_selection as ms
from sklearn import preprocessing as pp

class preprocess:
    
    output_columns_2labels = ['is_Normal','is_Attack']
    
    x_input = dataset.kdd_train_2labels.drop(output_columns_2labels, axis = 1)
    y_output = dataset.kdd_train_2labels.loc[:,output_columns_2labels]

    x_test_input = dataset.kdd_test_2labels.drop(output_columns_2labels, axis = 1)
    y_test = dataset.kdd_test_2labels.loc[:,output_columns_2labels]
    
    x_test__input = dataset.kdd_test__2labels.drop(output_columns_2labels, axis = 1)
    y_test_ = dataset.kdd_test__2labels.loc[:,output_columns_2labels]

    ss = pp.StandardScaler()

    x_train = ss.fit_transform(x_input)
    x_test = ss.transform(x_test_input)
    x_test_ = ss.transform(x_test__input)

    y_train = y_output.values
    y_test = y_test.values
    y_test_ = y_test_.values

preprocess.x_train.shape


Out[6]:
(25192, 122)

In [7]:
import tensorflow as tf

In [8]:
class network(object):
    
    input_dim = 122
    classes = 2
    hidden_encoder_dim = 122
    hidden_layers = 1
    latent_dim = 18

    def __init__(self, classes, hidden_layers, num_of_features):
        self.classes = classes
        self.hidden_layers = hidden_layers
        self.latent_dim = num_of_features
            
    def build_layers(self):
        tf.reset_default_graph()
        #learning_rate = tf.Variable(initial_value=0.001)

        input_dim = self.input_dim
        classes = self.classes
        hidden_encoder_dim = self.hidden_encoder_dim
        hidden_layers = self.hidden_layers
        latent_dim = self.latent_dim
        
        with tf.variable_scope("Input"):
            self.x = tf.placeholder("float", shape=[None, input_dim])
            self.y_ = tf.placeholder("float", shape=[None, classes])
            self.keep_prob = tf.placeholder("float")
            self.lr = tf.placeholder("float")
        
        with tf.variable_scope("Layer_Encoder"):

            hidden_encoder = tf.layers.dense(self.x, hidden_encoder_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
            hidden_encoder = tf.nn.dropout(hidden_encoder, self.keep_prob)
            for h in range(hidden_layers - 1):
                hidden_encoder = tf.layers.dense(hidden_encoder, latent_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
                hidden_encoder = tf.nn.dropout(hidden_encoder, self.keep_prob)
            
            #hidden_encoder = tf.layers.dense(self.x, latent_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
            #hidden_encoder = tf.nn.dropout(hidden_encoder, self.keep_prob)
            
        with tf.variable_scope("Layer_Dense_Softmax"):
            self.y = tf.layers.dense(hidden_encoder, classes, activation=tf.nn.softmax)
            
        with tf.variable_scope("Loss"):
            
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = self.y_, logits = self.y))

            #loss = tf.clip_by_value(loss, -1e-1, 1e-1)
            #loss = tf.where(tf.is_nan(loss), 1e-1, loss)
            #loss = tf.where(tf.equal(loss, -1e-1), tf.random_normal(loss.shape), loss)
            #loss = tf.where(tf.equal(loss, 1e-1), tf.random_normal(loss.shape), loss)
            
            self.regularized_loss = loss
            correct_prediction = tf.equal(tf.argmax(self.y_, 1), tf.argmax(self.y, 1))
            self.tf_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name = "Accuracy")

        with tf.variable_scope("Optimizer"):
            learning_rate=self.lr
            optimizer = tf.train.AdamOptimizer(learning_rate)
            gradients, variables = zip(*optimizer.compute_gradients(self.regularized_loss))
            gradients = [
                None if gradient is None else tf.clip_by_value(gradient, -1, 1)
                for gradient in gradients]
            self.train_op = optimizer.apply_gradients(zip(gradients, variables))
            #self.train_op = optimizer.minimize(self.regularized_loss)
            
        # add op for merging summary
        #self.summary_op = tf.summary.merge_all()
        self.pred = tf.argmax(self.y, axis = 1)
        self.actual = tf.argmax(self.y_, axis = 1)

        # add Saver ops
        self.saver = tf.train.Saver()

In [9]:
import collections
import time
import sklearn.metrics as me 

class Train:    
    
    result = namedtuple("score", ['epoch', 'no_of_features','hidden_layers','train_score', 'test_score', 'f1_score', 'test_score_20', 'f1_score_20', 'time_taken'])

    predictions = {}
    predictions_ = {}

    results = []
    
    best_acc = 0
    best_acc_global = 0
    
    def train(epochs, net, h,f, lrs):
        batch_iterations = 200
        train_loss = None
        Train.best_acc = 0
        os.makedirs("dataset/tf_dense_only_nsl_kdd-/hidden layers_{}_features count_{}".format(epochs,h,f),
                    exist_ok = True)
        with tf.Session() as sess:
            #summary_writer_train = tf.summary.FileWriter('./logs/kdd/VAE/training', graph=sess.graph)
            #summary_writer_valid = tf.summary.FileWriter('./logs/kdd/VAE/validation')

            sess.run(tf.global_variables_initializer())
            start_time = time.perf_counter()
            for c, lr in enumerate(lrs):
                for epoch in range(1, (epochs+1)):
                    x_train, x_valid, y_train, y_valid, = ms.train_test_split(preprocess.x_train, 
                                                                              preprocess.y_train, 
                                                                              test_size=0.1)
                    batch_indices = np.array_split(np.arange(x_train.shape[0]), 
                                               batch_iterations)

                    for i in batch_indices:

                        def train_batch():
                            nonlocal train_loss
                            _, train_loss = sess.run([net.train_op, 
                                                               net.regularized_loss, 
                                                               ], #net.summary_op
                                                              feed_dict={net.x: x_train[i,:], 
                                                                         net.y_: y_train[i,:], 
                                                                         net.keep_prob:0.5, net.lr:lr})

                        train_batch()
                        #summary_writer_train.add_summary(summary_str, epoch)
                        while((train_loss > 1e4 or np.isnan(train_loss)) and epoch > 1):
                            print("Step {} | Training Loss: {:.6f}".format(epoch, train_loss))
                            net.saver.restore(sess, 
                                              tf.train.latest_checkpoint('dataset/tf_dense_only_nsl_kdd-/hidden_layers_{}_features_count_{}'
                                                                         .format(epochs,h,f)))
                            train_batch()


                    valid_accuracy = sess.run(net.tf_accuracy, #net.summary_op 
                                                          feed_dict={net.x: x_valid, 
                                                                     net.y_: y_valid, 
                                                                     net.keep_prob:1, net.lr:lr})
                    #summary_writer_valid.add_summary(summary_str, epoch)


                    accuracy, pred_value, actual_value, y_pred = sess.run([net.tf_accuracy, 
                                                                   net.pred, 
                                                                   net.actual, net.y], 
                                                                  feed_dict={net.x: preprocess.x_test, 
                                                                             net.y_: preprocess.y_test, 
                                                                             net.keep_prob:1, net.lr:lr})
                    f1_score = me.f1_score(actual_value, pred_value)
                    accuracy_, pred_value_, actual_value_, y_pred_ = sess.run([net.tf_accuracy, 
                                                                   net.pred, 
                                                                   net.actual, net.y], 
                                                                  feed_dict={net.x: preprocess.x_test_, 
                                                                             net.y_: preprocess.y_test_, 
                                                                             net.keep_prob:1, net.lr:lr})
                    f1_score_ = me.f1_score(actual_value_, pred_value_)
                    
                    print("Step {} | Training Loss: {:.6f} | Validation Accuracy: {:.6f}".format(epoch, train_loss, valid_accuracy))
                    print("Accuracy on Test data: {}, {}".format(accuracy, accuracy_))

                    if accuracy > Train.best_acc_global:
                        Train.best_acc_global = accuracy
                        Train.pred_value = pred_value
                        Train.actual_value = actual_value
                        Train.pred_value_ = pred_value_
                        Train.actual_value_ = actual_value_
                        Train.best_parameters = "Hidden Layers:{}, Features Count:{}".format(h, f)

                    if accuracy > Train.best_acc:
                        Train.best_acc = accuracy

                        if not (np.isnan(train_loss)):
                            net.saver.save(sess, 
                                       "dataset/tf_dense_only_nsl_kdd-/hidden_layers_{}_features_count_{}".format(h,f),
                                        global_step = epochs)
                        curr_pred = pd.DataFrame({"Attack_prob":y_pred[:,-2], "Normal_prob":y_pred[:, -1], "Prediction":pred_value, "Actual":actual_value})
                        curr_pred_ = pd.DataFrame({"Attack_prob":y_pred_[:,-2], "Normal_prob":y_pred_[:, -1], "Prediction":pred_value_, "Actual": actual_value_})
                        
                        Train.predictions.update({"{}_{}_{}".format((epoch+1)*(c+1),f,h):(curr_pred, 
                                                   Train.result((epoch+1)*(c+1), f, h, valid_accuracy, accuracy, f1_score, accuracy_, f1_score_, time.perf_counter() - start_time))})
                        Train.predictions_.update({"{}_{}_{}".format((epoch+1)*(c+1),f,h):(curr_pred_, 
                                                   Train.result((epoch+1)*(c+1), f, h, valid_accuracy, accuracy, f1_score, accuracy_, f1_score_, time.perf_counter() - start_time))})

                        #Train.results.append(Train.result(epochs, f, h,valid_accuracy, accuracy))

In [10]:
import itertools

df_results = []
past_scores = []

class Hyperparameters:
#    features_arr = [2, 4, 8, 16, 32, 64, 128, 256]
#    hidden_layers_arr = [2, 4, 6, 10]

    def start_training():
        print("********************************** Training ******************************")

        global df_results
        global past_scores
        Train.predictions = {}
        Train.predictions_ = {}

        Train.results = []
    
        
        features_arr = [1, 12, 24, 48, 122]
        hidden_layers_arr = [1, 3]

        epochs = [5]
        lrs = [1e-5, 1e-6]
        print("********************************** Entering Loop ******************************")

        for e, h, f in itertools.product(epochs, hidden_layers_arr, features_arr):
            print("Current Layer Attributes - epochs:{} hidden layers:{} features count:{}".format(e,h,f))
            n = network(2,h,f)
            n.build_layers()
            Train.train(e, n, h,f, lrs)
            
        dict1 = {}
        dict1_ = {}
        dict2 = []

        for k, (v1, v2) in Train.predictions.items():
            dict1.update({k: v1})
            dict2.append(v2)

        for k, (v1_, v2) in Train.predictions_.items():
            dict1_.update({k: v1_})

        Train.predictions = dict1
        Train.predictions_ = dict1_
        
        Train.results = dict2
        df_results = pd.DataFrame(Train.results)

        #temp = df_results.set_index(['no_of_features', 'hidden_layers'])

        if not os.path.isfile('dataset/scores/tf_dense_only_nsl_kdd_scores_all-.pkl'):
            past_scores = df_results
        else:
            past_scores = pd.read_pickle("dataset/scores/tf_dense_only_nsl_kdd_scores_all-.pkl")
            past_scores = past_scores.append(df_results, ignore_index=True)
        past_scores.to_pickle("dataset/scores/tf_dense_only_nsl_kdd_scores_all-.pkl")

In [11]:
#%%timeit -r 10
#capture
Hyperparameters.start_training()


********************************** Training ******************************
********************************** Entering Loop ******************************
Current Layer Attributes - epochs:5 hidden layers:1 features count:1
Step 1 | Training Loss: 0.804066 | Validation Accuracy: 0.543254
Accuracy on Test data: 0.4374556541442871, 0.19578059017658234
Step 2 | Training Loss: 0.709505 | Validation Accuracy: 0.588889
Accuracy on Test data: 0.4477466344833374, 0.21392405033111572
Step 3 | Training Loss: 0.660634 | Validation Accuracy: 0.601190
Accuracy on Test data: 0.4582594037055969, 0.23265822231769562
Step 4 | Training Loss: 0.602664 | Validation Accuracy: 0.730952
Accuracy on Test data: 0.507762610912323, 0.24793249368667603
Step 5 | Training Loss: 0.588001 | Validation Accuracy: 0.759524
Accuracy on Test data: 0.5309173464775085, 0.2886075973510742
Step 1 | Training Loss: 0.599571 | Validation Accuracy: 0.757143
Accuracy on Test data: 0.5338005423545837, 0.29375526309013367
Step 2 | Training Loss: 0.573258 | Validation Accuracy: 0.767857
Accuracy on Test data: 0.5369499921798706, 0.299156129360199
Step 3 | Training Loss: 0.576401 | Validation Accuracy: 0.754365
Accuracy on Test data: 0.53974449634552, 0.30362868309020996
Step 4 | Training Loss: 0.601985 | Validation Accuracy: 0.775794
Accuracy on Test data: 0.5444464087486267, 0.31147679686546326
Step 5 | Training Loss: 0.593091 | Validation Accuracy: 0.776190
Accuracy on Test data: 0.5603264570236206, 0.3157805800437927
Current Layer Attributes - epochs:5 hidden layers:1 features count:12
Step 1 | Training Loss: 0.651385 | Validation Accuracy: 0.639286
Accuracy on Test data: 0.591288149356842, 0.39518988132476807
Step 2 | Training Loss: 0.626038 | Validation Accuracy: 0.763889
Accuracy on Test data: 0.6734829545021057, 0.4113923907279968
Step 3 | Training Loss: 0.621024 | Validation Accuracy: 0.815079
Accuracy on Test data: 0.6867902874946594, 0.42405062913894653
Step 4 | Training Loss: 0.604952 | Validation Accuracy: 0.845635
Accuracy on Test data: 0.7069730162620544, 0.46025317907333374
Step 5 | Training Loss: 0.533078 | Validation Accuracy: 0.860317
Accuracy on Test data: 0.7197480201721191, 0.48320674896240234
Step 1 | Training Loss: 0.539315 | Validation Accuracy: 0.868651
Accuracy on Test data: 0.7216110825538635, 0.4866666793823242
Step 2 | Training Loss: 0.540589 | Validation Accuracy: 0.865873
Accuracy on Test data: 0.7237402200698853, 0.4903797507286072
Step 3 | Training Loss: 0.527431 | Validation Accuracy: 0.878571
Accuracy on Test data: 0.7267565727233887, 0.4957805871963501
Step 4 | Training Loss: 0.531291 | Validation Accuracy: 0.888492
Accuracy on Test data: 0.7296398282051086, 0.5010126829147339
Step 5 | Training Loss: 0.525746 | Validation Accuracy: 0.888095
Accuracy on Test data: 0.7312366962432861, 0.503628671169281
Current Layer Attributes - epochs:5 hidden layers:1 features count:24
Step 1 | Training Loss: 0.767678 | Validation Accuracy: 0.288889
Accuracy on Test data: 0.43701207637786865, 0.6046413779258728
Step 2 | Training Loss: 0.683623 | Validation Accuracy: 0.555159
Accuracy on Test data: 0.6864797472953796, 0.6579746603965759
Step 3 | Training Loss: 0.642832 | Validation Accuracy: 0.778175
Accuracy on Test data: 0.8164921998977661, 0.6801687479019165
Step 4 | Training Loss: 0.579741 | Validation Accuracy: 0.846429
Accuracy on Test data: 0.8341465592384338, 0.696793258190155
Step 5 | Training Loss: 0.553790 | Validation Accuracy: 0.871825
Accuracy on Test data: 0.8423970937728882, 0.7072573900222778
Step 1 | Training Loss: 0.572720 | Validation Accuracy: 0.869841
Accuracy on Test data: 0.8425745368003845, 0.7075105309486389
Step 2 | Training Loss: 0.573432 | Validation Accuracy: 0.879762
Accuracy on Test data: 0.8437721729278564, 0.7093670964241028
Step 3 | Training Loss: 0.557021 | Validation Accuracy: 0.885317
Accuracy on Test data: 0.8442157506942749, 0.7102109789848328
Step 4 | Training Loss: 0.558985 | Validation Accuracy: 0.884127
Accuracy on Test data: 0.845014214515686, 0.7113924026489258
Step 5 | Training Loss: 0.557387 | Validation Accuracy: 0.890873
Accuracy on Test data: 0.8449698090553284, 0.7108860611915588
Current Layer Attributes - epochs:5 hidden layers:1 features count:48
Step 1 | Training Loss: 0.597744 | Validation Accuracy: 0.846825
Accuracy on Test data: 0.8561035990715027, 0.750379741191864
Step 2 | Training Loss: 0.547499 | Validation Accuracy: 0.864683
Accuracy on Test data: 0.8491394519805908, 0.7291139364242554
Step 3 | Training Loss: 0.538729 | Validation Accuracy: 0.898413
Accuracy on Test data: 0.8504258394241333, 0.7295358777046204
Step 4 | Training Loss: 0.485779 | Validation Accuracy: 0.907937
Accuracy on Test data: 0.8552164435386658, 0.7378059029579163
Step 5 | Training Loss: 0.482663 | Validation Accuracy: 0.916667
Accuracy on Test data: 0.857123851776123, 0.7406750917434692
Step 1 | Training Loss: 0.466812 | Validation Accuracy: 0.911905
Accuracy on Test data: 0.857123851776123, 0.7406750917434692
Step 2 | Training Loss: 0.495983 | Validation Accuracy: 0.912302
Accuracy on Test data: 0.8573456406593323, 0.7408438920974731
Step 3 | Training Loss: 0.492276 | Validation Accuracy: 0.915476
Accuracy on Test data: 0.8575230836868286, 0.7411814332008362
Step 4 | Training Loss: 0.505915 | Validation Accuracy: 0.914286
Accuracy on Test data: 0.8578779101371765, 0.7418565154075623
Step 5 | Training Loss: 0.484171 | Validation Accuracy: 0.917063
Accuracy on Test data: 0.8577448725700378, 0.7415189743041992
Current Layer Attributes - epochs:5 hidden layers:1 features count:122
Step 1 | Training Loss: 0.752569 | Validation Accuracy: 0.426984
Accuracy on Test data: 0.4384758770465851, 0.4375527501106262
Step 2 | Training Loss: 0.665333 | Validation Accuracy: 0.583730
Accuracy on Test data: 0.5985628366470337, 0.4886919856071472
Step 3 | Training Loss: 0.620566 | Validation Accuracy: 0.662302
Accuracy on Test data: 0.6890525221824646, 0.5228691697120667
Step 4 | Training Loss: 0.589624 | Validation Accuracy: 0.823810
Accuracy on Test data: 0.7759048938751221, 0.5993248820304871
Step 5 | Training Loss: 0.557219 | Validation Accuracy: 0.869841
Accuracy on Test data: 0.7939584851264954, 0.6238818764686584
Step 1 | Training Loss: 0.594878 | Validation Accuracy: 0.874206
Accuracy on Test data: 0.7950674295425415, 0.6243038177490234
Step 2 | Training Loss: 0.531732 | Validation Accuracy: 0.882143
Accuracy on Test data: 0.796664297580719, 0.6259071826934814
Step 3 | Training Loss: 0.560829 | Validation Accuracy: 0.878175
Accuracy on Test data: 0.7975957989692688, 0.6254852414131165
Step 4 | Training Loss: 0.570878 | Validation Accuracy: 0.892063
Accuracy on Test data: 0.798660397529602, 0.6264978647232056
Step 5 | Training Loss: 0.561398 | Validation Accuracy: 0.884921
Accuracy on Test data: 0.7987934947013855, 0.6267510652542114
Current Layer Attributes - epochs:5 hidden layers:3 features count:1
/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
Step 1 | Training Loss: 0.693112 | Validation Accuracy: 0.551190
Accuracy on Test data: 0.43075764179229736, 0.18160337209701538
Step 2 | Training Loss: 0.693037 | Validation Accuracy: 0.546429
Accuracy on Test data: 0.43075764179229736, 0.18160337209701538
Step 3 | Training Loss: 0.693286 | Validation Accuracy: 0.543254
Accuracy on Test data: 0.43075764179229736, 0.18160337209701538
Step 4 | Training Loss: 0.693048 | Validation Accuracy: 0.530159
Accuracy on Test data: 0.43075764179229736, 0.18160337209701538
Step 5 | Training Loss: 0.693126 | Validation Accuracy: 0.535714
Accuracy on Test data: 0.43075764179229736, 0.18160337209701538
Step 1 | Training Loss: 0.692970 | Validation Accuracy: 0.531746
Accuracy on Test data: 0.43075764179229736, 0.18160337209701538
Step 2 | Training Loss: 0.692914 | Validation Accuracy: 0.547619
Accuracy on Test data: 0.43075764179229736, 0.18160337209701538
Step 3 | Training Loss: 0.692909 | Validation Accuracy: 0.523413
Accuracy on Test data: 0.43075764179229736, 0.18160337209701538
Step 4 | Training Loss: 0.693070 | Validation Accuracy: 0.511905
Accuracy on Test data: 0.43075764179229736, 0.18160337209701538
Step 5 | Training Loss: 0.693404 | Validation Accuracy: 0.534921
Accuracy on Test data: 0.43075764179229736, 0.18160337209701538
Current Layer Attributes - epochs:5 hidden layers:3 features count:12
Step 1 | Training Loss: 0.691103 | Validation Accuracy: 0.700397
Accuracy on Test data: 0.6388839483261108, 0.425907164812088
Step 2 | Training Loss: 0.655879 | Validation Accuracy: 0.782143
Accuracy on Test data: 0.6881210207939148, 0.43932488560676575
Step 3 | Training Loss: 0.695034 | Validation Accuracy: 0.816667
Accuracy on Test data: 0.7005411386489868, 0.45358648896217346
Step 4 | Training Loss: 0.677474 | Validation Accuracy: 0.846429
Accuracy on Test data: 0.7159332633018494, 0.47915610671043396
Step 5 | Training Loss: 0.651454 | Validation Accuracy: 0.848413
Accuracy on Test data: 0.7221433520317078, 0.48877638578414917
Step 1 | Training Loss: 0.633732 | Validation Accuracy: 0.846825
Accuracy on Test data: 0.7227200269699097, 0.4897046387195587
Step 2 | Training Loss: 0.678854 | Validation Accuracy: 0.860317
Accuracy on Test data: 0.7232966423034668, 0.4907172918319702
Step 3 | Training Loss: 0.624039 | Validation Accuracy: 0.858333
Accuracy on Test data: 0.7239620089530945, 0.49181434512138367
Step 4 | Training Loss: 0.651784 | Validation Accuracy: 0.854365
Accuracy on Test data: 0.7269783616065979, 0.4973839521408081
Step 5 | Training Loss: 0.652090 | Validation Accuracy: 0.857540
Accuracy on Test data: 0.7295954823493958, 0.502109706401825
Current Layer Attributes - epochs:5 hidden layers:3 features count:24
Step 1 | Training Loss: 0.730834 | Validation Accuracy: 0.349603
Accuracy on Test data: 0.20045244693756104, 0.2586497962474823
Step 2 | Training Loss: 0.712260 | Validation Accuracy: 0.574603
Accuracy on Test data: 0.44016146659851074, 0.24329113960266113
Step 3 | Training Loss: 0.688727 | Validation Accuracy: 0.670635
Accuracy on Test data: 0.5702182650566101, 0.2827848196029663
Step 4 | Training Loss: 0.637173 | Validation Accuracy: 0.773413
Accuracy on Test data: 0.6251330971717834, 0.3086076080799103
Step 5 | Training Loss: 0.679531 | Validation Accuracy: 0.807936
Accuracy on Test data: 0.6705996990203857, 0.3924894630908966
Step 1 | Training Loss: 0.655700 | Validation Accuracy: 0.816667
Accuracy on Test data: 0.6737490892410278, 0.39822784066200256
Step 2 | Training Loss: 0.658028 | Validation Accuracy: 0.815873
Accuracy on Test data: 0.6775638461112976, 0.4049789011478424
Step 3 | Training Loss: 0.640675 | Validation Accuracy: 0.821825
Accuracy on Test data: 0.680890679359436, 0.4112236201763153
Step 4 | Training Loss: 0.691339 | Validation Accuracy: 0.815079
Accuracy on Test data: 0.6831085681915283, 0.4152742624282837
Step 5 | Training Loss: 0.669821 | Validation Accuracy: 0.817460
Accuracy on Test data: 0.6852821111679077, 0.41940927505493164
Current Layer Attributes - epochs:5 hidden layers:3 features count:48
Step 1 | Training Loss: 0.719790 | Validation Accuracy: 0.662302
Accuracy on Test data: 0.49795955419540405, 0.2288607656955719
Step 2 | Training Loss: 0.712641 | Validation Accuracy: 0.691270
Accuracy on Test data: 0.5543381571769714, 0.2574683427810669
Step 3 | Training Loss: 0.670495 | Validation Accuracy: 0.776190
Accuracy on Test data: 0.6291696429252625, 0.3164556920528412
Step 4 | Training Loss: 0.641001 | Validation Accuracy: 0.830159
Accuracy on Test data: 0.6670510768890381, 0.3821941018104553
Step 5 | Training Loss: 0.634173 | Validation Accuracy: 0.869444
Accuracy on Test data: 0.6928229331970215, 0.4279325008392334
Step 1 | Training Loss: 0.625137 | Validation Accuracy: 0.874603
Accuracy on Test data: 0.6949964761734009, 0.4319831132888794
Step 2 | Training Loss: 0.603623 | Validation Accuracy: 0.878571
Accuracy on Test data: 0.6970812678337097, 0.4356118142604828
Step 3 | Training Loss: 0.558584 | Validation Accuracy: 0.884127
Accuracy on Test data: 0.7000532150268555, 0.44092828035354614
Step 4 | Training Loss: 0.620057 | Validation Accuracy: 0.879762
Accuracy on Test data: 0.7027590274810791, 0.445991575717926
Step 5 | Training Loss: 0.575750 | Validation Accuracy: 0.887698
Accuracy on Test data: 0.7040454149246216, 0.44835442304611206
Current Layer Attributes - epochs:5 hidden layers:3 features count:122
Step 1 | Training Loss: 0.668684 | Validation Accuracy: 0.768254
Accuracy on Test data: 0.7576738595962524, 0.5873417854309082
Step 2 | Training Loss: 0.641573 | Validation Accuracy: 0.809921
Accuracy on Test data: 0.7869943380355835, 0.6220253109931946
Step 3 | Training Loss: 0.642092 | Validation Accuracy: 0.875397
Accuracy on Test data: 0.8244765996932983, 0.6774683594703674
Step 4 | Training Loss: 0.625123 | Validation Accuracy: 0.874603
Accuracy on Test data: 0.8274485468864441, 0.6827847957611084
Step 5 | Training Loss: 0.566429 | Validation Accuracy: 0.894841
Accuracy on Test data: 0.8262509107589722, 0.6793248653411865
Step 1 | Training Loss: 0.622355 | Validation Accuracy: 0.895635
Accuracy on Test data: 0.8262065052986145, 0.6790717244148254
Step 2 | Training Loss: 0.568567 | Validation Accuracy: 0.894048
Accuracy on Test data: 0.8259403705596924, 0.6783966422080994
Step 3 | Training Loss: 0.596171 | Validation Accuracy: 0.900794
Accuracy on Test data: 0.8257185816764832, 0.6779747009277344
Step 4 | Training Loss: 0.595517 | Validation Accuracy: 0.892460
Accuracy on Test data: 0.825762927532196, 0.6778903007507324
Step 5 | Training Loss: 0.553253 | Validation Accuracy: 0.899206
Accuracy on Test data: 0.8259847164154053, 0.6780591011047363

In [ ]:


In [12]:
#g = df_results.groupby(by=['no_of_features'])
#idx = g['test_score'].transform(max) == df_results['test_score']
#df_results[idx].sort_values(by = 'test_score', ascending = False)

In [13]:
#g = df_results.groupby(by=['no_of_features'])
#idx = g['test_score_20'].transform(max) == df_results['test_score_20']
#df_results[idx].sort_values(by = 'test_score_20', ascending = False)

In [14]:
#df_results.sort_values(by = 'test_score', ascending = False)

In [15]:
#Train.predictions_

In [16]:
pd.Panel(Train.predictions).to_pickle("dataset/tf_dense_only_nsl_kdd_predictions-.pkl")
pd.Panel(Train.predictions_).to_pickle("dataset/tf_dense_only_nsl_kdd_predictions-__.pkl")

df_results.to_pickle("dataset/tf_dense_only_nsl_kdd_scores-.pkl")

In [ ]:


In [17]:
import numpy as np
import matplotlib.pyplot as plt
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    np.set_printoptions(precision=4)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        #print("Normalized confusion matrix")
    else:
        #print('Confusion matrix, without normalization')
        pass
    
    #print(cm)

    label = [["\n True Negative", "\n False Positive \n Type II Error"],
             ["\n False Negative \n Type I Error", "\n True Positive"]
            ]
    
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        
        plt.text(j, i, "{} {}".format(cm[i, j].round(4), label[i][j]),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

def plot(actual_value, pred_value):
    from sklearn.metrics import confusion_matrix

    cm_2labels = confusion_matrix(y_pred = pred_value, y_true = actual_value)
    plt.figure(figsize=[6,6])
    plot_confusion_matrix(cm_2labels, ['Normal', 'Attack'], normalize = False)

In [18]:
#plot(actual_value = Train.actual_value, pred_value = Train.pred_value)

In [19]:
#plot(actual_value = Train.actual_value_, pred_value = Train.pred_value_)

In [20]:
past_scores = pd.read_pickle("dataset/scores/tf_dense_only_nsl_kdd_scores_all-.pkl")

In [21]:
past_scores.sort_values(by='f1_score',ascending=False)


Out[21]:
epoch no_of_features hidden_layers train_score test_score f1_score test_score_20 f1_score_20 time_taken
23 2 48 1 0.846825 0.856104 0.869488 0.750380 0.839570 0.642131
26 10 48 1 0.914286 0.857878 0.867734 0.741857 0.828982 4.327987
25 8 48 1 0.915476 0.857523 0.867393 0.741181 0.828516 3.768181
24 6 48 1 0.912302 0.857346 0.867195 0.740844 0.828234 3.158489
22 10 24 1 0.884127 0.845014 0.847968 0.711392 0.795185 5.346153
21 8 24 1 0.885317 0.844216 0.847078 0.710211 0.794199 4.741357
20 6 24 1 0.879762 0.843772 0.846576 0.709367 0.793476 4.194588
18 4 24 1 0.869841 0.842575 0.845299 0.707511 0.791957 3.586408
19 5 24 1 0.846429 0.834147 0.836632 0.696793 0.783385 2.446598
63 5 122 3 0.874603 0.827449 0.836445 0.682785 0.784028 3.972664
62 4 122 3 0.875397 0.824477 0.834484 0.677468 0.781899 2.972374
61 3 122 3 0.809921 0.786994 0.801472 0.622025 0.745786 1.957672
34 12 122 1 0.884921 0.798793 0.788984 0.626751 0.710366 5.995009
33 10 122 1 0.892063 0.798660 0.788795 0.626498 0.710075 5.441039
32 8 122 1 0.878175 0.797596 0.787659 0.625485 0.709289 4.856589
31 6 122 1 0.882143 0.796664 0.786592 0.625907 0.709749 4.193246
29 4 122 1 0.874206 0.795067 0.784696 0.624304 0.708219 3.609194
60 2 122 3 0.768254 0.757674 0.771661 0.587342 0.714702 0.950984
30 5 122 1 0.823810 0.775905 0.761900 0.599325 0.683129 2.403930
43 12 12 3 0.857540 0.729595 0.704365 0.502110 0.587355 8.040761
15 12 12 1 0.888095 0.731237 0.701277 0.503629 0.579437 5.775745
42 10 12 3 0.854365 0.726978 0.700676 0.497384 0.581800 7.221911
17 3 24 1 0.555159 0.686480 0.699566 0.657975 0.751456 1.181485
14 10 12 1 0.888492 0.729640 0.698879 0.501013 0.576099 5.240943
41 8 12 3 0.858333 0.723962 0.696454 0.491814 0.575317 6.397934
40 6 12 3 0.860317 0.723297 0.695589 0.490717 0.574130 5.594408
38 4 12 3 0.846825 0.722720 0.694880 0.489705 0.573163 4.851391
13 8 12 1 0.878571 0.726757 0.694626 0.495781 0.569556 4.638695
12 6 12 1 0.865873 0.723740 0.690088 0.490380 0.562677 4.093649
10 4 12 1 0.868651 0.721611 0.686951 0.486667 0.558082 3.485513
39 5 12 3 0.846429 0.715933 0.685709 0.479156 0.560899 3.209052
11 5 12 1 0.845635 0.706973 0.664397 0.460253 0.523256 2.335920
59 12 48 3 0.887698 0.704045 0.655265 0.448354 0.504960 8.769443
58 10 48 3 0.879762 0.702759 0.653032 0.445992 0.501329 7.874758
37 3 12 3 0.782143 0.688121 0.649798 0.439325 0.514824 1.595154
57 8 48 3 0.884127 0.700053 0.648727 0.440928 0.494391 6.982720
56 6 48 3 0.878571 0.697081 0.643859 0.435612 0.486802 6.084052
54 4 48 3 0.874603 0.694996 0.640414 0.431983 0.481633 5.269588
51 12 24 3 0.817460 0.685282 0.622265 0.419409 0.456298 8.399888
9 3 12 1 0.763889 0.673483 0.620547 0.411392 0.465476 1.176470
50 10 24 3 0.815079 0.683109 0.618417 0.415274 0.449774 7.532192
49 8 24 3 0.821825 0.680891 0.614717 0.411224 0.443842 6.746385
48 6 24 3 0.815873 0.677564 0.609088 0.404979 0.434518 5.869643
46 4 24 3 0.816667 0.673749 0.602497 0.398228 0.424130 4.998380
55 5 48 3 0.830159 0.667051 0.593083 0.382194 0.407254 3.501897
36 2 12 3 0.700397 0.638884 0.575658 0.425907 0.498858 0.850992
16 2 24 1 0.288889 0.437012 0.548552 0.604641 0.708263 0.624594
8 2 12 1 0.639286 0.591288 0.541729 0.395190 0.451855 0.631919
47 5 24 3 0.773413 0.625133 0.513612 0.308608 0.275020 3.390746
28 3 122 1 0.583730 0.598563 0.505140 0.488692 0.570192 1.208710
7 12 1 1 0.776190 0.560326 0.377152 0.315781 0.288022 5.756575
27 2 122 1 0.426984 0.438476 0.367208 0.437553 0.514955 0.576209
53 3 48 3 0.691270 0.554338 0.363913 0.257468 0.182629 1.817415
6 10 1 1 0.775794 0.544446 0.339295 0.311477 0.279686 5.161612
5 8 1 1 0.754365 0.539744 0.327631 0.303629 0.265052 4.625136
4 6 1 1 0.767857 0.536950 0.320510 0.299156 0.256424 4.075766
2 4 1 1 0.757143 0.533801 0.312533 0.293755 0.246104 3.478816
3 5 1 1 0.730952 0.507763 0.241335 0.247932 0.150753 2.334681
52 2 48 3 0.662302 0.497960 0.212825 0.228861 0.111435 0.927484
45 3 24 3 0.574603 0.440161 0.121651 0.243291 0.151093 1.698722
44 2 24 3 0.349603 0.200452 0.121375 0.258650 0.218486 0.865629
1 3 1 1 0.588889 0.447747 0.061086 0.213924 0.076168 1.196932
0 2 1 1 0.543254 0.437456 0.026857 0.195781 0.034448 0.650613
35 2 1 3 0.551190 0.430758 0.000000 0.181603 0.000000 0.857066

In [22]:
psg = past_scores.sort_values(by='test_score', ascending=False).groupby(by=['no_of_features', 'hidden_layers'])
psg.first().sort_values(by='test_score', ascending=False)


Out[22]:
epoch train_score test_score f1_score test_score_20 f1_score_20 time_taken
no_of_features hidden_layers
48 1 10 0.914286 0.857878 0.867734 0.741857 0.828982 4.327987
24 1 10 0.884127 0.845014 0.847968 0.711392 0.795185 5.346153
122 3 5 0.874603 0.827449 0.836445 0.682785 0.784028 3.972664
1 12 0.884921 0.798793 0.788984 0.626751 0.710366 5.995009
12 1 12 0.888095 0.731237 0.701277 0.503629 0.579437 5.775745
3 12 0.857540 0.729595 0.704365 0.502110 0.587355 8.040761
48 3 12 0.887698 0.704045 0.655265 0.448354 0.504960 8.769443
24 3 12 0.817460 0.685282 0.622265 0.419409 0.456298 8.399888
1 1 12 0.776190 0.560326 0.377152 0.315781 0.288022 5.756575
3 2 0.551190 0.430758 0.000000 0.181603 0.000000 0.857066

In [23]:
psg.mean().sort_values(by='test_score', ascending=False)


Out[23]:
epoch train_score test_score f1_score test_score_20 f1_score_20 time_taken
no_of_features hidden_layers
48 1 6.500000 0.897222 0.857213 0.867953 0.743565 0.831326 2.974197
122 3 3.500000 0.832044 0.799148 0.811015 0.642405 0.756604 2.463423
24 1 5.428571 0.744218 0.761888 0.781667 0.685413 0.773989 3.160169
122 1 6.250000 0.780754 0.724966 0.696372 0.581814 0.664497 3.535491
12 3 6.250000 0.825794 0.708686 0.675391 0.477015 0.558293 4.720200
1 6.250000 0.829812 0.700591 0.662312 0.468038 0.535805 3.422357
48 3 6.250000 0.823562 0.652286 0.551390 0.383924 0.396304 5.153420
24 3 6.250000 0.723065 0.583293 0.477953 0.357458 0.356645 4.937698
1 1 6.250000 0.711806 0.513529 0.250800 0.272679 0.199582 3.410016
3 2.000000 0.551190 0.430758 0.000000 0.181603 0.000000 0.857066

In [24]:
Train.predictions = pd.read_pickle("dataset/tf_dense_only_nsl_kdd_predictions-.pkl")
Train.predictions_ = pd.read_pickle("dataset/tf_dense_only_nsl_kdd_predictions-__.pkl")

In [25]:
#epoch_nof_hidden
Train.predictions["12_12_3"]


Out[25]:
Actual Attack_prob Normal_prob Prediction
0 1.0 0.315425 0.684575 1.0
1 1.0 0.313895 0.686105 1.0
2 0.0 0.518888 0.481112 0.0
3 1.0 0.492554 0.507446 1.0
4 1.0 0.407654 0.592345 1.0
5 0.0 0.766832 0.233168 0.0
6 0.0 0.624008 0.375992 0.0
7 1.0 0.676774 0.323226 0.0
8 0.0 0.746453 0.253547 0.0
9 1.0 0.533616 0.466384 0.0
10 1.0 0.519037 0.480963 0.0
11 0.0 0.661695 0.338305 0.0
12 1.0 0.315470 0.684530 1.0
13 1.0 0.415229 0.584771 1.0
14 0.0 0.556834 0.443166 0.0
15 0.0 0.770972 0.229028 0.0
16 0.0 0.736537 0.263463 0.0
17 0.0 0.770437 0.229563 0.0
18 0.0 0.583279 0.416721 0.0
19 1.0 0.319527 0.680473 1.0
20 1.0 0.386174 0.613827 1.0
21 1.0 0.598108 0.401892 0.0
22 0.0 0.738327 0.261673 0.0
23 0.0 0.772876 0.227124 0.0
24 1.0 0.277226 0.722774 1.0
25 1.0 0.535714 0.464286 0.0
26 0.0 0.741612 0.258388 0.0
27 0.0 0.740315 0.259685 0.0
28 1.0 0.569087 0.430913 0.0
29 0.0 0.578221 0.421779 0.0
30 1.0 0.536508 0.463492 0.0
31 0.0 0.681668 0.318332 0.0
32 0.0 0.775476 0.224524 0.0
33 0.0 0.470893 0.529107 1.0
34 1.0 0.280704 0.719296 1.0
35 1.0 0.247434 0.752566 1.0
36 0.0 0.739893 0.260107 0.0
37 1.0 0.537207 0.462793 0.0
38 0.0 0.573186 0.426814 0.0
39 0.0 0.492756 0.507244 1.0
40 1.0 0.334219 0.665781 1.0
41 0.0 0.777075 0.222925 0.0
42 0.0 0.763563 0.236437 0.0
43 0.0 0.726119 0.273881 0.0
44 1.0 0.592471 0.407529 0.0
45 0.0 0.730619 0.269381 0.0
46 1.0 0.517486 0.482514 0.0
47 1.0 0.741790 0.258210 0.0
48 1.0 0.319903 0.680097 1.0
49 0.0 0.545315 0.454685 0.0
... ... ... ... ...
22494 1.0 0.500594 0.499406 0.0
22495 0.0 0.782375 0.217624 0.0
22496 1.0 0.421413 0.578587 1.0
22497 1.0 0.315251 0.684749 1.0
22498 1.0 0.316045 0.683955 1.0
22499 0.0 0.722879 0.277121 0.0
22500 1.0 0.319497 0.680503 1.0
22501 1.0 0.791304 0.208696 0.0
22502 1.0 0.546509 0.453491 0.0
22503 1.0 0.359054 0.640946 1.0
22504 1.0 0.275268 0.724732 1.0
22505 1.0 0.468573 0.531427 1.0
22506 0.0 0.584382 0.415618 0.0
22507 0.0 0.579053 0.420947 0.0
22508 0.0 0.701545 0.298455 0.0
22509 1.0 0.454361 0.545639 1.0
22510 1.0 0.485994 0.514006 1.0
22511 0.0 0.721500 0.278500 0.0
22512 1.0 0.528028 0.471972 0.0
22513 1.0 0.603016 0.396984 0.0
22514 0.0 0.520138 0.479862 0.0
22515 1.0 0.508554 0.491446 0.0
22516 0.0 0.758068 0.241932 0.0
22517 1.0 0.540403 0.459597 0.0
22518 0.0 0.713313 0.286687 0.0
22519 1.0 0.487434 0.512566 1.0
22520 1.0 0.341756 0.658244 1.0
22521 1.0 0.458776 0.541224 1.0
22522 1.0 0.739913 0.260087 0.0
22523 0.0 0.755810 0.244190 0.0
22524 1.0 0.587982 0.412018 0.0
22525 1.0 0.438880 0.561120 1.0
22526 0.0 0.768690 0.231310 0.0
22527 0.0 0.788431 0.211569 0.0
22528 1.0 0.481061 0.518939 1.0
22529 0.0 0.557205 0.442795 0.0
22530 1.0 0.322497 0.677503 1.0
22531 1.0 0.361536 0.638464 1.0
22532 0.0 0.741776 0.258224 0.0
22533 0.0 0.771269 0.228731 0.0
22534 1.0 0.315719 0.684281 1.0
22535 0.0 0.786737 0.213263 0.0
22536 1.0 0.482062 0.517938 1.0
22537 1.0 0.617349 0.382651 0.0
22538 1.0 0.479014 0.520986 1.0
22539 0.0 0.646830 0.353170 0.0
22540 0.0 0.745547 0.254453 0.0
22541 1.0 0.736529 0.263471 0.0
22542 0.0 0.566808 0.433192 0.0
22543 1.0 0.343301 0.656699 1.0

22544 rows × 4 columns


In [26]:
Train.predictions_["12_12_3"]


Out[26]:
Actual Attack_prob Normal_prob Prediction
0 1.0 0.543920 0.456081 0.0
1 1.0 0.526548 0.473452 0.0
2 1.0 0.496500 0.503500 1.0
3 0.0 0.579483 0.420517 0.0
4 1.0 0.319396 0.680604 1.0
5 1.0 0.705106 0.294894 0.0
6 1.0 0.197644 0.802356 1.0
7 1.0 0.515859 0.484141 0.0
8 0.0 0.575331 0.424669 0.0
9 0.0 0.472066 0.527934 1.0
10 1.0 0.358096 0.641904 1.0
11 1.0 0.362601 0.637399 1.0
12 1.0 0.482501 0.517499 1.0
13 1.0 0.542294 0.457706 0.0
14 1.0 0.499134 0.500866 1.0
15 1.0 0.206643 0.793357 1.0
16 1.0 0.493367 0.506633 1.0
17 1.0 0.346482 0.653518 1.0
18 1.0 0.507655 0.492345 0.0
19 1.0 0.448269 0.551731 1.0
20 1.0 0.506078 0.493922 0.0
21 1.0 0.539517 0.460483 0.0
22 1.0 0.445136 0.554864 1.0
23 1.0 0.804998 0.195002 0.0
24 1.0 0.505626 0.494374 0.0
25 1.0 0.541719 0.458281 0.0
26 1.0 0.741133 0.258867 0.0
27 1.0 0.596873 0.403127 0.0
28 0.0 0.579421 0.420579 0.0
29 1.0 0.050611 0.949389 1.0
30 1.0 0.474777 0.525223 1.0
31 1.0 0.799844 0.200156 0.0
32 0.0 0.578895 0.421105 0.0
33 0.0 0.521038 0.478962 0.0
34 1.0 0.547626 0.452374 0.0
35 0.0 0.461183 0.538817 1.0
36 1.0 0.436298 0.563702 1.0
37 1.0 0.513554 0.486447 0.0
38 1.0 0.396584 0.603416 1.0
39 1.0 0.227210 0.772790 1.0
40 0.0 0.657107 0.342893 0.0
41 1.0 0.723432 0.276568 0.0
42 1.0 0.553322 0.446678 0.0
43 0.0 0.591006 0.408994 0.0
44 1.0 0.296897 0.703103 1.0
45 1.0 0.637234 0.362765 0.0
46 1.0 0.355710 0.644290 1.0
47 1.0 0.542141 0.457859 0.0
48 1.0 0.148797 0.851203 1.0
49 1.0 0.364512 0.635488 1.0
... ... ... ... ...
11800 1.0 0.616209 0.383791 0.0
11801 1.0 0.285195 0.714805 1.0
11802 0.0 0.567614 0.432386 0.0
11803 1.0 0.806096 0.193904 0.0
11804 1.0 0.793400 0.206600 0.0
11805 1.0 0.497081 0.502919 1.0
11806 0.0 0.582638 0.417362 0.0
11807 1.0 0.818284 0.181716 0.0
11808 1.0 0.228338 0.771662 1.0
11809 1.0 0.402391 0.597609 1.0
11810 1.0 0.694455 0.305545 0.0
11811 1.0 0.462588 0.537412 1.0
11812 0.0 0.563739 0.436261 0.0
11813 1.0 0.680450 0.319550 0.0
11814 1.0 0.852943 0.147057 0.0
11815 1.0 0.513456 0.486544 0.0
11816 1.0 0.803023 0.196977 0.0
11817 0.0 0.582011 0.417989 0.0
11818 0.0 0.613968 0.386032 0.0
11819 1.0 0.743872 0.256128 0.0
11820 1.0 0.542108 0.457892 0.0
11821 1.0 0.613308 0.386691 0.0
11822 1.0 0.598801 0.401199 0.0
11823 1.0 0.366480 0.633520 1.0
11824 1.0 0.569719 0.430281 0.0
11825 0.0 0.718438 0.281562 0.0
11826 1.0 0.638256 0.361744 0.0
11827 1.0 0.306060 0.693940 1.0
11828 1.0 0.785302 0.214698 0.0
11829 1.0 0.583305 0.416695 0.0
11830 1.0 0.575192 0.424808 0.0
11831 1.0 0.492085 0.507915 1.0
11832 1.0 0.149318 0.850682 1.0
11833 0.0 0.495344 0.504656 1.0
11834 1.0 0.491428 0.508572 1.0
11835 0.0 0.582543 0.417457 0.0
11836 1.0 0.597265 0.402735 0.0
11837 1.0 0.621068 0.378932 0.0
11838 1.0 0.386631 0.613369 1.0
11839 1.0 0.404297 0.595703 1.0
11840 0.0 0.499817 0.500183 1.0
11841 0.0 0.590678 0.409322 0.0
11842 1.0 0.547577 0.452423 0.0
11843 1.0 0.359032 0.640968 1.0
11844 1.0 0.803895 0.196105 0.0
11845 0.0 0.584702 0.415298 0.0
11846 0.0 0.720294 0.279706 0.0
11847 1.0 0.408420 0.591580 1.0
11848 1.0 0.342619 0.657381 1.0
11849 1.0 0.542108 0.457892 0.0

11850 rows × 4 columns


In [27]:
df = Train.predictions["12_12_3"].dropna()
df_ = Train.predictions_["12_12_3"].dropna()

In [28]:
from sklearn import metrics as me
def get_score(y_true, y_pred):
    f1 = me.f1_score(y_true, y_pred)
    pre = me.precision_score(y_true, y_pred)
    rec = me.recall_score(y_true, y_pred)
    acc = me.accuracy_score(y_true, y_pred)
    return {"F1 Score":f1, "Precision":pre, "Recall":rec, "Accuracy":acc}

In [29]:
from sklearn import metrics as me

scores = get_score(df.loc[:,'Actual'].values.astype(int),
                df.loc[:,'Prediction'].values.astype(int))
scores.update({"Scenario":"Train+/Test+"})
score_df = pd.DataFrame(scores, index=[0])

scores = get_score(df_.loc[:,'Actual'].values.astype(int),
                df_.loc[:,'Prediction'].values.astype(int))
scores.update({"Scenario":"Train+/Test-"})

score_df = score_df.append(pd.DataFrame(scores, index=[1]))

score_df


Out[29]:
Accuracy F1 Score Precision Recall Scenario
0 0.729595 0.704365 0.932580 0.565885 Train+/Test+
1 0.502110 0.587355 0.912826 0.432976 Train+/Test-

In [30]:
df.groupby(by="Actual").Actual.count()


Out[30]:
Actual
0.0     9711
1.0    12833
Name: Actual, dtype: int64

In [31]:
plot(actual_value = df.loc[:,'Actual'].values.astype(int),
     pred_value = df.loc[:,'Prediction'].values.astype(int))



In [32]:
df_.groupby(by="Actual").Actual.count()


Out[32]:
Actual
0.0    2152
1.0    9698
Name: Actual, dtype: int64

In [33]:
plot(actual_value = df_.loc[:,'Actual'].values.astype(int),
     pred_value = df_.loc[:,'Prediction'].values.astype(int))



In [34]:
from scipy import stats

def fn(x):
    #print(x)
    return stats.norm.interval(0.95, loc=x.f1_score.mean(), scale=x.f1_score.std())
psg.apply(fn)


Out[34]:
no_of_features  hidden_layers
1               1                (-0.0106932141878, 0.512292833118)
                3                                        (nan, nan)
12              1                  (0.553520322795, 0.771103258353)
                3                  (0.589579017435, 0.761202912378)
24              1                   (0.553882891437, 1.00945197776)
                3                 (0.0413605111368, 0.914544769148)
48              1                   (0.86589960535, 0.870006020745)
                3                  (0.221270489485, 0.881508919623)
122             1                   (0.372951841349, 1.01979176516)
                3                  (0.750740841232, 0.871289928235)
dtype: object

In [ ]: