In [1]:
%matplotlib inline
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from keras.layers import Input
from keras.layers.convolutional import Conv1D
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout, LSTM, Embedding, Reshape, Flatten, concatenate
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.utils import np_utils, plot_model
from keras.preprocessing import text
from IPython.display import Image
from keras.initializers import glorot_normal
from keras.optimizers import SGD, Adam, Adagrad
import xgboost
from sklearn.metrics import roc_curve, auc
import os
In [2]:
np.random.seed(42)
In [3]:
trn_1 = pd.read_csv("awidtrainingalmostfull")
In [4]:
to_int16 = ['radiotap.present.reserved', 'wlan.fc.type_subtype', 'wlan.fc.ds', 'wlan_mgt.fixed.capabilities.cfpoll.ap', 'wlan_mgta.fixed.listen_ival', 'wlan_mgt.fixed.status_code', 'wlan_mgt.fixed.timestamp',
'wlan_mgt.fixed.aid', 'wlan_mgt.fixed.reason_code', 'wlan_mgt.fixed.auth_seq', 'wlan_mgt.fixed.htact', 'wlan_mgt.fixed.chanwidth', 'wlan_mgt.tim.bmapctl.offset', 'wlan_mgt.country_info.environment',
'wlan_mgt.rsn.capabilities.ptksa_replay_counter', 'wlan_mgt.rsn.capabilities.gtksa_replay_counter', 'wlan.wep.iv', 'wlan.wep.icv', 'wlan.qos.ack' ]
to_drop = ['frame.interface_id', 'frame.dlt', 'wlan.ra', 'wlan.da', 'wlan.ta', 'wlan.sa', 'wlan.bssid', 'wlan.ba.bm', 'wlan_mgt.fixed.current_ap',
'wlan_mgt.ssid', 'wlan.tkip.extiv', 'wlan.ccmp.extiv', 'radiotap.dbm_antsignal', 'wlan_mgt.fixed.sequence' ]
to_drop2 = ['frame.interface_id','frame.dlt','frame.offset_shift','frame.time_delta_displayed','frame.cap_len','frame.marked','frame.ignored','radiotap.version','radiotap.pad','radiotap.length','radiotap.present.tsft','radiotap.present.flags','radiotap.present.rate','radiotap.present.channel','radiotap.present.fhss','radiotap.present.dbm_antsignal','radiotap.present.dbm_antnoise','radiotap.present.lock_quality','radiotap.present.tx_attenuation','radiotap.present.db_tx_attenuation','radiotap.present.dbm_tx_power','radiotap.present.antenna','radiotap.present.db_antsignal','radiotap.present.db_antnoise','radiotap.present.rxflags','radiotap.present.xchannel','radiotap.present.mcs','radiotap.present.ampdu','radiotap.present.vht','radiotap.present.reserved','radiotap.present.rtap_ns','radiotap.present.vendor_ns','radiotap.present.ext','radiotap.flags.cfp','radiotap.flags.preamble','radiotap.flags.wep','radiotap.flags.frag','radiotap.flags.fcs','radiotap.flags.datapad','radiotap.flags.badfcs','radiotap.flags.shortgi','radiotap.channel.freq','radiotap.channel.type.turbo','radiotap.channel.type.cck','radiotap.channel.type.ofdm','radiotap.channel.type.2ghz','radiotap.channel.type.5ghz','radiotap.channel.type.passive','radiotap.channel.type.dynamic','radiotap.channel.type.gfsk','radiotap.channel.type.gsm','radiotap.channel.type.sturbo','radiotap.channel.type.half','radiotap.channel.type.quarter','radiotap.dbm_antsignal','radiotap.antenna','radiotap.rxflags.badplcp','wlan.fc.type_subtype','wlan.fc.version','wlan.fc.ds','wlan.fc.moredata','wlan.fc.order','wlan.ra','wlan.da','wlan.ta','wlan.sa','wlan.bssid','wlan.bar.type','wlan.ba.control.ackpolicy','wlan.ba.control.multitid','wlan.ba.control.cbitmap','wlan.bar.compressed.tidinfo','wlan.ba.bm','wlan.fcs_good','wlan_mgt.fixed.capabilities.ess','wlan_mgt.fixed.capabilities.ibss','wlan_mgt.fixed.capabilities.cfpoll.ap','wlan_mgt.fixed.capabilities.agility','wlan_mgt.fixed.capabilities.apsd','wlan_mgt.fixed.capabilities.radio_measurement','wlan_mgt.fixed.capabilities.dsss_ofdm','wlan_mgt.fixed.capabilities.del_blk_ack','wlan_mgt.fixed.capabilities.imm_blk_ack','wlan_mgt.fixed.listen_ival','wlan_mgt.fixed.current_ap','wlan_mgt.fixed.status_code','wlan_mgt.fixed.timestamp','wlan_mgt.fixed.aid','wlan_mgt.fixed.reason_code','wlan_mgt.fixed.auth_seq','wlan_mgt.fixed.category_code','wlan_mgt.fixed.htact','wlan_mgt.fixed.chanwidth','wlan_mgt.fixed.fragment','wlan_mgt.fixed.sequence','wlan_mgt.tagged.all','wlan_mgt.ssid','wlan_mgt.ds.current_channel','wlan_mgt.tim.dtim_period','wlan_mgt.tim.bmapctl.multicast','wlan_mgt.tim.bmapctl.offset','wlan_mgt.country_info.environment','wlan_mgt.rsn.gcs.type','wlan_mgt.rsn.pcs.count','wlan_mgt.rsn.akms.count','wlan_mgt.rsn.akms.type','wlan_mgt.rsn.capabilities.preauth','wlan_mgt.rsn.capabilities.no_pairwise','wlan_mgt.rsn.capabilities.ptksa_replay_counter','wlan_mgt.rsn.capabilities.gtksa_replay_counter','wlan_mgt.rsn.capabilities.mfpr','wlan_mgt.rsn.capabilities.mfpc','wlan_mgt.rsn.capabilities.peerkey','wlan_mgt.tcprep.trsmt_pow','wlan_mgt.tcprep.link_mrg','wlan.wep.icv','wlan.tkip.extiv','wlan.ccmp.extiv','wlan.qos.tid','wlan.qos.priority','wlan.qos.eosp','wlan.qos.ack','wlan.qos.amsdupresent','wlan.qos.buf_state_indicated','wlan.qos.bit4','wlan.qos.txop_dur_req','wlan.qos.buf_state_indicated']
float_col = ['frame.time_epoch', 'frame.time_delta', 'frame.time_delta_displayed', 'frame.time_relative']
bools = [
"wlan.fc.frag", "wlan.fc.retry", "wlan.fc.pwrmgt", "wlan.fc.protected", "wlan_mgt.fixed.capabilities.privacy",
"wlan_mgt.fixed.capabilities.preamble", "wlan_mgt.fixed.capabilities.pbcc", "wlan_mgt.fixed.capabilities.spec_man",
"wlan_mgt.fixed.capabilities.short_slot_time", "wlan.qos.buf_state_indicated.1", 'frame.marked', 'radiotap.present.tsft',
"radiotap.present.flags", "radiotap.present.rate", "radiotap.present.channel", "radiotap.present.fhss", "radiotap.present.dbm_antsignal",
"radiotap.present.dbm_antnoise", "radiotap.present.lock_quality", "radiotap.present.tx_attenuation",
"radiotap.present.dbm_tx_power", "radiotap.present.db_antsignal", "radiotap.present.db_antnoise", "radiotap.present.rxflags",
"radiotap.present.xchannel", "radiotap.present.mcs", "radiotap.present.ampdu", "radiotap.present.vht",
"radiotap.present.rtap_ns", "radiotap.present.ext", "radiotap.flags.cfp", "radiotap.flags.preamble",
"radiotap.flags.wep","radiotap.flags.frag","radiotap.flags.fcs","radiotap.flags.datapad","radiotap.flags.badfcs",
"radiotap.flags.shortgi", "radiotap.channel.type.turbo","radiotap.channel.type.cck","radiotap.channel.type.ofdm",
"radiotap.channel.type.2ghz","radiotap.channel.type.5ghz","radiotap.channel.type.passive",
"radiotap.channel.type.dynamic","radiotap.channel.type.gfsk","radiotap.channel.type.gsm",
"radiotap.channel.type.sturbo","radiotap.channel.type.half","radiotap.channel.type.quarter",
"radiotap.rxflags.badplcp", "wlan.fc.moredata", "wlan.fc.order", "wlan.ba.control.ackpolicy", "wlan.ba.control.multitid",
"wlan.ba.control.cbitmap", "wlan.fcs_good", "wlan_mgt.fixed.capabilities.ess", "wlan_mgt.fixed.capabilities.ibss",
"wlan_mgt.fixed.capabilities.cfpoll.ap", "wlan_mgt.fixed.capabilities.agility", "wlan_mgt.fixed.capabilities.apsd",
"wlan_mgt.fixed.capabilities.radio_measurement", "wlan_mgt.fixed.capabilities.dsss_ofdm", "wlan_mgt.fixed.capabilities.del_blk_ack",
"wlan_mgt.fixed.capabilities.imm_blk_ack", "wlan_mgt.tim.bmapctl.multicast", "wlan_mgt.rsn.capabilities.preauth",
"wlan_mgt.rsn.capabilities.no_pairwise", "wlan_mgt.rsn.capabilities.mfpr", "wlan_mgt.rsn.capabilities.mfpc",
"wlan_mgt.rsn.capabilities.peerkey", "wlan.qos.eosp", "wlan.qos.amsdupresent", "wlan.qos.buf_state_indicated",
"wlan.qos.bit4", "wlan.qos.buf_state_indicated"
]
In [5]:
trn_1.drop(to_drop2, axis=1, inplace=True)
In [6]:
trn_1 = trn_1.replace("?", -1)
In [7]:
for a in trn_1.columns:
if a != "class":
try:
trn_1[a] = trn_1[a].apply(float)
except:
trn_1[a] = trn_1[a].apply(lambda x: float(int(str(x), base=16)))
finally:
pass
In [8]:
for a in bools:
if a in trn_1.columns:
trn_1[a+".is0"] = trn_1[a].apply(lambda x: 1 if x == 0 else 0)
trn_1[a+".is1"] = trn_1[a].apply(lambda x: 1 if x == 1 else 0)
trn_1[a+".is-1"] = trn_1[a].apply(lambda x: 1 if x == -1 else 0)
In [9]:
trn_1.drop(bools, axis=1, inplace=True, errors='ignore')
In [10]:
def encode(a, t=0):
e = {
"normal": ([1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], 0),
"arp": ([0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], 1),
"cafe_latte": ([0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0], 2),
"amok": ([0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0], 3),
"deauthentication": ([0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0], 4),
"authentication_request": ([0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0], 5),
"evil_twin": ([0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0], 6),
"beacon": ([0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0], 7),
"probe_response": ([0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0], 8),
"fragmentation": ([0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0], 9),
"probe_request": ([0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0], 10),
"chop_chop":([0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0], 11),
"rts":([0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0], 12),
"cts":([0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0], 13),
"hirte":([0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0], 14),
"power_saving":([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0], 15),
"disassociation":([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1], 16)
}
return(e[a][t])
In [11]:
trn_1["class_1h"] = trn_1["class"].apply(lambda x: encode(x, 0))
In [12]:
trn_1.drop(["class"], axis=1, inplace=True)
In [13]:
np.save("X.npy", trn_1[trn_1.columns[[a != "class_1h" for a in trn_1.columns]]].values)
In [14]:
np.save("Y.npy", trn_1["class_1h"].values)
In [15]:
X = np.load("X.npy").astype(np.float32)
In [16]:
Y = np.load("Y.npy")
In [23]:
nest = [50,100,250]
maxde = [3,5,10]
learnrate = [0.1]
minchild = [10]
subsampl = [1]
from sklearn.externals import joblib
for subs in subsampl:
for minc in minchild:
for lra in learnrate:
for maxd in maxde:
for nes in nest:
model = xgboost.XGBClassifier(n_estimators = nes, max_depth=maxd, min_child_weight=minc, subsample=subs, colsample_bytree=0.5, learning_rate=lra)
model.fit(X, Y, eval_metric='auc')
joblib.dump(model, "nest" + str(nes) + "maxde" + str(maxd) + "xgb.joblib.dat")
y_pred = model.predict(X_tst)
classes = np.array([list(map(int,i)) for i in np.array(Y_tst)])
classes_pred = np.array([list(map(int,i)) for i in y_pred])
print(classification_report(list(map(np.argmax, y_pred)), list(map(np.argmax, Y_tst))))
accuracy = accuracy_score(list(map(np.argmax, y_pred)), list(map(np.argmax, Y_tst)))
print(accuracy)
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(17):
fpr[i], tpr[i], _ = roc_curve(classes[:,i], classes_pred[:,i] )
roc_auc[i] = auc(fpr[i], tpr[i])
plt.figure(figsize=(12, 9))
lw = 2
plt.plot(fpr[i], tpr[i], color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[i])
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve ' + class_list[i] + ' ' + str(model))
plt.legend(loc="lower right")
plt.show()
plt.close()