In [1]:
import pandas as pd
import numpy as np
pd.set_option("display.max_rows",30)
%matplotlib inline
In [2]:
class dataset:
kdd_train_2labels = pd.read_pickle("dataset/kdd_train_2labels_20percent.pkl")
kdd_train_2labels_y = pd.read_pickle("dataset/kdd_train_2labels_y_20percent.pkl")
kdd_test_2labels = pd.read_pickle("dataset/kdd_test_2labels_20percent.pkl")
kdd_test_2labels_y = pd.read_pickle("dataset/kdd_test_2labels_y_20percent.pkl")
In [3]:
from sklearn.preprocessing import LabelEncoder
le_2labels = LabelEncoder()
dataset.y_train_2labels = le_2labels.fit_transform(dataset.kdd_train_2labels_y)
dataset.y_test_2labels = le_2labels.transform(dataset.kdd_test_2labels_y)
In [4]:
from itertools import product
from sklearn.model_selection import train_test_split
class preprocessing:
x_train = dataset.kdd_train_2labels.iloc[:,:-2].values
y_train = np.array(dataset.y_train_2labels)
x_test, y_test = (dataset.kdd_test_2labels.iloc[:,:-2].values,
np.array(dataset.y_test_2labels))
In [ ]:
from collections import namedtuple
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from keras import regularizers
class Train:
score = namedtuple("score", ['epoch', 'no_of_features','hidden_layers','test_loss', 'test_score'])
model_detail = namedtuple("model_detail", ['epoch', 'no_of_features','hidden_layers', 'model'])
scores = []
models = []
def execute(x_train, x_test,
y_train, y_test,
input_dim, no_of_features, hidden_layers,
epochs = 40, keep_prob = 0.4):
print("Training for no_of_features: {}, hidden_layer: {}".format(no_of_features, hidden_layers
))
model = Sequential()
model.add(Dense(no_of_features, input_dim=input_dim, activation='relu'))
model.add(Dropout(keep_prob))
model.add(BatchNormalization())
for i in range(hidden_layers - 1):
model.add(Dense(no_of_features, activation='relu'))
model.add(Dropout(keep_prob))
model.add(BatchNormalization())
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer="Adam",
metrics=['accuracy'])
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=.6)
model.fit(x_train, y_train,
validation_data=(x_valid, y_valid),
epochs=epochs,
batch_size=128,
verbose = 0)
curr_score = model.evaluate(x_test, y_test) #, batch_size=128)
print("\n Loss: {}, Accuracy: {}".format(curr_score[0], curr_score[1]) )
Train.scores.append(Train.score(epochs,no_of_features,hidden_layers,curr_score[0], curr_score[1]))
Train.models.append(Train.model_detail(epochs,no_of_features,hidden_layers,model))
In [ ]:
features_arr = [4, 8, 16, 32, 64, 128, 256, 1024]
hidden_layers_arr = [2, 4, 6, 50, 100]
for f, h in product(features_arr, hidden_layers_arr):
Train.execute(preprocessing.x_train, preprocessing.x_test,
preprocessing.y_train, preprocessing.y_test,
118, f, h)
In [ ]:
pd.DataFrame(Train.scores)
In [ ]:
for m in Train.models:
m.model.save("dataset/keras_model_epoch_{}_no_of_features_{}_hidden_layers_{}".format(m.epoch,
m.no_of_features,
m.hidden_layers))
In [ ]: