In [1]:
%matplotlib inline
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from keras.layers import Input
from keras.layers.convolutional import Conv1D
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout, LSTM, Embedding, Reshape, Flatten, concatenate
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.utils import np_utils, plot_model
from keras.preprocessing import text
from IPython.display import Image
from keras.initializers import glorot_normal
from keras.optimizers import SGD, Adam, Adagrad
import xgboost
from sklearn.metrics import roc_curve, auc
import os
In [2]:
np.random.seed(42)
In [3]:
trn_1 = pd.read_csv("awidtrainingalmostfull")
In [4]:
to_int16 = ['radiotap.present.reserved', 'wlan.fc.type_subtype', 'wlan.fc.ds', 'wlan_mgt.fixed.capabilities.cfpoll.ap', 'wlan_mgta.fixed.listen_ival', 'wlan_mgt.fixed.status_code', 'wlan_mgt.fixed.timestamp',
'wlan_mgt.fixed.aid', 'wlan_mgt.fixed.reason_code', 'wlan_mgt.fixed.auth_seq', 'wlan_mgt.fixed.htact', 'wlan_mgt.fixed.chanwidth', 'wlan_mgt.tim.bmapctl.offset', 'wlan_mgt.country_info.environment',
'wlan_mgt.rsn.capabilities.ptksa_replay_counter', 'wlan_mgt.rsn.capabilities.gtksa_replay_counter', 'wlan.wep.iv', 'wlan.wep.icv', 'wlan.qos.ack' ]
to_drop = ['frame.interface_id', 'frame.dlt', 'wlan.ra', 'wlan.da', 'wlan.ta', 'wlan.sa', 'wlan.bssid', 'wlan.ba.bm', 'wlan_mgt.fixed.current_ap',
'wlan_mgt.ssid', 'wlan.tkip.extiv', 'wlan.ccmp.extiv', 'radiotap.dbm_antsignal', 'wlan_mgt.fixed.sequence' ]
to_drop2 = ['frame.interface_id','frame.dlt','frame.offset_shift','frame.time_delta_displayed','frame.cap_len','frame.marked','frame.ignored','radiotap.version','radiotap.pad','radiotap.length','radiotap.present.tsft','radiotap.present.flags','radiotap.present.rate','radiotap.present.channel','radiotap.present.fhss','radiotap.present.dbm_antsignal','radiotap.present.dbm_antnoise','radiotap.present.lock_quality','radiotap.present.tx_attenuation','radiotap.present.db_tx_attenuation','radiotap.present.dbm_tx_power','radiotap.present.antenna','radiotap.present.db_antsignal','radiotap.present.db_antnoise','radiotap.present.rxflags','radiotap.present.xchannel','radiotap.present.mcs','radiotap.present.ampdu','radiotap.present.vht','radiotap.present.reserved','radiotap.present.rtap_ns','radiotap.present.vendor_ns','radiotap.present.ext','radiotap.flags.cfp','radiotap.flags.preamble','radiotap.flags.wep','radiotap.flags.frag','radiotap.flags.fcs','radiotap.flags.datapad','radiotap.flags.badfcs','radiotap.flags.shortgi','radiotap.channel.freq','radiotap.channel.type.turbo','radiotap.channel.type.cck','radiotap.channel.type.ofdm','radiotap.channel.type.2ghz','radiotap.channel.type.5ghz','radiotap.channel.type.passive','radiotap.channel.type.dynamic','radiotap.channel.type.gfsk','radiotap.channel.type.gsm','radiotap.channel.type.sturbo','radiotap.channel.type.half','radiotap.channel.type.quarter','radiotap.dbm_antsignal','radiotap.antenna','radiotap.rxflags.badplcp','wlan.fc.type_subtype','wlan.fc.version','wlan.fc.ds','wlan.fc.moredata','wlan.fc.order','wlan.ra','wlan.da','wlan.ta','wlan.sa','wlan.bssid','wlan.bar.type','wlan.ba.control.ackpolicy','wlan.ba.control.multitid','wlan.ba.control.cbitmap','wlan.bar.compressed.tidinfo','wlan.ba.bm','wlan.fcs_good','wlan_mgt.fixed.capabilities.ess','wlan_mgt.fixed.capabilities.ibss','wlan_mgt.fixed.capabilities.cfpoll.ap','wlan_mgt.fixed.capabilities.agility','wlan_mgt.fixed.capabilities.apsd','wlan_mgt.fixed.capabilities.radio_measurement','wlan_mgt.fixed.capabilities.dsss_ofdm','wlan_mgt.fixed.capabilities.del_blk_ack','wlan_mgt.fixed.capabilities.imm_blk_ack','wlan_mgt.fixed.listen_ival','wlan_mgt.fixed.current_ap','wlan_mgt.fixed.status_code','wlan_mgt.fixed.timestamp','wlan_mgt.fixed.aid','wlan_mgt.fixed.reason_code','wlan_mgt.fixed.auth_seq','wlan_mgt.fixed.category_code','wlan_mgt.fixed.htact','wlan_mgt.fixed.chanwidth','wlan_mgt.fixed.fragment','wlan_mgt.fixed.sequence','wlan_mgt.tagged.all','wlan_mgt.ssid','wlan_mgt.ds.current_channel','wlan_mgt.tim.dtim_period','wlan_mgt.tim.bmapctl.multicast','wlan_mgt.tim.bmapctl.offset','wlan_mgt.country_info.environment','wlan_mgt.rsn.gcs.type','wlan_mgt.rsn.pcs.count','wlan_mgt.rsn.akms.count','wlan_mgt.rsn.akms.type','wlan_mgt.rsn.capabilities.preauth','wlan_mgt.rsn.capabilities.no_pairwise','wlan_mgt.rsn.capabilities.ptksa_replay_counter','wlan_mgt.rsn.capabilities.gtksa_replay_counter','wlan_mgt.rsn.capabilities.mfpr','wlan_mgt.rsn.capabilities.mfpc','wlan_mgt.rsn.capabilities.peerkey','wlan_mgt.tcprep.trsmt_pow','wlan_mgt.tcprep.link_mrg','wlan.wep.icv','wlan.tkip.extiv','wlan.ccmp.extiv','wlan.qos.tid','wlan.qos.priority','wlan.qos.eosp','wlan.qos.ack','wlan.qos.amsdupresent','wlan.qos.buf_state_indicated','wlan.qos.bit4','wlan.qos.txop_dur_req','wlan.qos.buf_state_indicated']
float_col = ['frame.time_epoch', 'frame.time_delta', 'frame.time_delta_displayed', 'frame.time_relative']
bools = [
"wlan.fc.frag", "wlan.fc.retry", "wlan.fc.pwrmgt", "wlan.fc.protected", "wlan_mgt.fixed.capabilities.privacy",
"wlan_mgt.fixed.capabilities.preamble", "wlan_mgt.fixed.capabilities.pbcc", "wlan_mgt.fixed.capabilities.spec_man",
"wlan_mgt.fixed.capabilities.short_slot_time", "wlan.qos.buf_state_indicated.1", 'frame.marked', 'radiotap.present.tsft',
"radiotap.present.flags", "radiotap.present.rate", "radiotap.present.channel", "radiotap.present.fhss", "radiotap.present.dbm_antsignal",
"radiotap.present.dbm_antnoise", "radiotap.present.lock_quality", "radiotap.present.tx_attenuation",
"radiotap.present.dbm_tx_power", "radiotap.present.db_antsignal", "radiotap.present.db_antnoise", "radiotap.present.rxflags",
"radiotap.present.xchannel", "radiotap.present.mcs", "radiotap.present.ampdu", "radiotap.present.vht",
"radiotap.present.rtap_ns", "radiotap.present.ext", "radiotap.flags.cfp", "radiotap.flags.preamble",
"radiotap.flags.wep","radiotap.flags.frag","radiotap.flags.fcs","radiotap.flags.datapad","radiotap.flags.badfcs",
"radiotap.flags.shortgi", "radiotap.channel.type.turbo","radiotap.channel.type.cck","radiotap.channel.type.ofdm",
"radiotap.channel.type.2ghz","radiotap.channel.type.5ghz","radiotap.channel.type.passive",
"radiotap.channel.type.dynamic","radiotap.channel.type.gfsk","radiotap.channel.type.gsm",
"radiotap.channel.type.sturbo","radiotap.channel.type.half","radiotap.channel.type.quarter",
"radiotap.rxflags.badplcp", "wlan.fc.moredata", "wlan.fc.order", "wlan.ba.control.ackpolicy", "wlan.ba.control.multitid",
"wlan.ba.control.cbitmap", "wlan.fcs_good", "wlan_mgt.fixed.capabilities.ess", "wlan_mgt.fixed.capabilities.ibss",
"wlan_mgt.fixed.capabilities.cfpoll.ap", "wlan_mgt.fixed.capabilities.agility", "wlan_mgt.fixed.capabilities.apsd",
"wlan_mgt.fixed.capabilities.radio_measurement", "wlan_mgt.fixed.capabilities.dsss_ofdm", "wlan_mgt.fixed.capabilities.del_blk_ack",
"wlan_mgt.fixed.capabilities.imm_blk_ack", "wlan_mgt.tim.bmapctl.multicast", "wlan_mgt.rsn.capabilities.preauth",
"wlan_mgt.rsn.capabilities.no_pairwise", "wlan_mgt.rsn.capabilities.mfpr", "wlan_mgt.rsn.capabilities.mfpc",
"wlan_mgt.rsn.capabilities.peerkey", "wlan.qos.eosp", "wlan.qos.amsdupresent", "wlan.qos.buf_state_indicated",
"wlan.qos.bit4", "wlan.qos.buf_state_indicated"
]
In [5]:
trn_1.drop(to_drop2, axis=1, inplace=True)
In [6]:
trn_1 = trn_1.replace("?", -1)
In [7]:
for a in trn_1.columns:
if a != "class":
try:
trn_1[a] = trn_1[a].apply(float)
except:
trn_1[a] = trn_1[a].apply(lambda x: float(int(str(x), base=16)))
finally:
pass
In [8]:
for a in bools:
if a in trn_1.columns:
trn_1[a+".is0"] = trn_1[a].apply(lambda x: 1 if x == 0 else 0)
trn_1[a+".is1"] = trn_1[a].apply(lambda x: 1 if x == 1 else 0)
trn_1[a+".is-1"] = trn_1[a].apply(lambda x: 1 if x == -1 else 0)
In [9]:
trn_1.drop(bools, axis=1, inplace=True, errors='ignore')
In [10]:
def encode(a, t=0):
e = {
"normal": ([1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], 0),
"arp": ([0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], 1),
"cafe_latte": ([0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0], 2),
"amok": ([0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0], 3),
"deauthentication": ([0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0], 4),
"authentication_request": ([0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0], 5),
"evil_twin": ([0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0], 6),
"beacon": ([0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0], 7),
"probe_response": ([0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0], 8),
"fragmentation": ([0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0], 9),
"probe_request": ([0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0], 10),
"chop_chop":([0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0], 11),
"rts":([0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0], 12),
"cts":([0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0], 13),
"hirte":([0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0], 14),
"power_saving":([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0], 15),
"disassociation":([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1], 16)
}
return(e[a][t])
In [11]:
trn_1["class_1h"] = trn_1["class"].apply(lambda x: encode(x, 0))
In [12]:
trn_1.drop(["class"], axis=1, inplace=True)
In [13]:
np.save("X.npy", trn_1[trn_1.columns[[a != "class_1h" for a in trn_1.columns]]].values)
In [14]:
np.save("Y.npy", trn_1["class_1h"].values)
In [15]:
X = np.load("X.npy").astype(np.float32)
In [16]:
Y = np.load("Y.npy")
In [23]:
nest = [50,100,250]
maxde = [3,5,10]
learnrate = [0.1]
minchild = [10]
subsampl = [1]
from sklearn.externals import joblib
for subs in subsampl:
for minc in minchild:
for lra in learnrate:
for maxd in maxde:
for nes in nest:
model = xgboost.XGBClassifier(n_estimators = nes, max_depth=maxd, min_child_weight=minc, subsample=subs, colsample_bytree=0.5, learning_rate=lra)
model.fit(X, Y, eval_metric='auc')
joblib.dump(model, "nest" + str(nes) + "maxde" + str(maxd) + "xgb.joblib.dat")
y_pred = model.predict(X_tst)
classes = np.array([list(map(int,i)) for i in np.array(Y_tst)])
classes_pred = np.array([list(map(int,i)) for i in y_pred])
print(classification_report(list(map(np.argmax, y_pred)), list(map(np.argmax, Y_tst))))
accuracy = accuracy_score(list(map(np.argmax, y_pred)), list(map(np.argmax, Y_tst)))
print(accuracy)
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(17):
fpr[i], tpr[i], _ = roc_curve(classes[:,i], classes_pred[:,i] )
roc_auc[i] = auc(fpr[i], tpr[i])
plt.figure(figsize=(12, 9))
lw = 2
plt.plot(fpr[i], tpr[i], color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[i])
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve ' + class_list[i] + ' ' + str(model))
plt.legend(loc="lower right")
plt.show()
plt.close()
In [23]:
inp = Input(shape=(48,))
norm = BatchNormalization()(inp)
act = Activation("relu")(norm)
rshp = Reshape(target_shape=(1,48))(act)
conv1 = Conv1D(35, 3, strides=1, padding="same", activation="sigmoid", kernel_initializer=glorot_normal(42))(rshp)
conv2 = Conv1D(35, 5, strides=1, padding="same", activation="sigmoid", kernel_initializer=glorot_normal(42))(rshp)
conc = concatenate([conv1, conv2], axis=1)
flt = Flatten()(conc)
dense2 = Dense(70, activation="sigmoid", kernel_initializer=glorot_normal(42))(flt)
dense3 = Dense(17, activation="sigmoid", kernel_initializer=glorot_normal(42))(dense2)
In [56]:
inp2 = Input(shape=(48,))
norm = BatchNormalization()(inp2)
act = Activation("relu")(norm)
dense22 = Dense(12, activation="sigmoid")(act)
dense222 = Dense(18, activation="sigmoid")(dense22)
dense32 = Dense(17, activation="sigmoid")(dense222)
In [24]:
from keras.models import load_model
from keras.layers.normalization import BatchNormalization
model1 = Model(inputs=inp, outputs=dense3)
In [35]:
plot_model(model2, "model2.png", show_shapes=False, show_layer_names=False)
In [57]:
model2 = Model(inputs=inp2, outputs=dense32)
In [43]:
Image("model1.png")
Out[43]:
In [58]:
model2.compile(optimizer=Adagrad(lr = 0.01), loss="categorical_crossentropy", metrics=["categorical_accuracy"])
In [41]:
X.shape
Out[41]:
In [26]:
Y
Out[26]:
In [29]:
trn_1 = trn_1.sample(frac=1.0)
In [59]:
history1 = model2.fit(
X,
Y.tolist(), batch_size=32,
epochs=7, validation_split=0.0001, shuffle=True
)
In [22]:
trn_1
Out[22]:
In [60]:
tst_1 = pd.read_csv("1tst")
tst_1.drop(to_drop2, axis=1, inplace=True)
tst_1 = tst_1.replace("?", -1)
for a in tst_1.columns:
if a != "class":
try:
tst_1[a] = tst_1[a].apply(float)
except:
tst_1[a] = tst_1[a].apply(lambda x: float(int(str(x), base=16)))
finally:
pass
for a in bools:
if a in tst_1.columns:
tst_1[a+".is0"] = tst_1[a].apply(lambda x: 1 if x == 0 else 0)
tst_1[a+".is1"] = tst_1[a].apply(lambda x: 1 if x == 1 else 0)
tst_1[a+".is-1"] = tst_1[a].apply(lambda x: 1 if x == -1 else 0)
tst_1.drop(bools, axis=1, inplace=True,errors='ignore')
tst_1["class_1h"] = tst_1["class"].apply(lambda x: encode(x, 0))
tst_1.drop(["class"], axis=1, inplace=True)
X_tst = tst_1[tst_1.columns[[a != "class_1h" for a in tst_1.columns]]].values
Y_tst = tst_1["class_1h"].values
del tst_1
class_list = ["normal","arp","cafe_latte","amok","deauthentication","authentication_request","evil_twin", "beacon","probe_response","fragmentation",
"probe_request","chop_chop","rts","cts","hirte","power_saving","disassociation"]
In [61]:
y_pred = model2.predict(X_tst)
y_classes = list(map(np.argmax, y_pred))
classes = np.array([list(map(int,i)) for i in np.array(Y_tst)])
A = {
0 : [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
1 : [0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
2 : [0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
3 : [0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],
4 : [0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0],
5 : [0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0],
6 : [0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
7 : [0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],
8 : [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],
9 : [0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],
10 : [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0],
11 : [0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0],
12 : [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],
13 : [0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0],
14 : [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0],
15 : [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0],
16 : [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1]
}
y_classes_list = [A[i] for i in y_classes]
classes_pred = np.array([list(map(int,i)) for i in y_classes_list])
print(classification_report(list(map(np.argmax, y_pred)), list(map(np.argmax, Y_tst))))
accuracy = accuracy_score(list(map(np.argmax, y_pred)), list(map(np.argmax, Y_tst)))
print(accuracy)
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(17):
fpr[i], tpr[i], _ = roc_curve(classes[:,i], classes_pred[:,i] )
print(fpr[i], tpr[i])
roc_auc[i] = auc(fpr[i], tpr[i])
plt.figure(figsize=(12, 9))
lw = 2
plt.plot(fpr[i], tpr[i], color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[i])
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve ' + class_list[i] + ' ' + str(model2))
plt.legend(loc="lower right")
plt.show()
plt.close()
In [151]:
print(y_classes)
In [62]:
from keras.models import load_model
model1.save('seq1.h5')
In [ ]: