Initialize workflow.
In [ ]:
import struct, socket
import shutil
import numpy as np
import pandas as pd
import linecache, bisect
import csv
import operator
import os, time, subprocess
try:
import ipywidgets as widgets # For jupyter/ipython >= 1.4
except ImportError:
from IPython.html import widgets
from IPython.display import display, Javascript, clear_output
path = os.getcwd().split("/")
date = path[len(path)-1]
dsource = path[len(path)-2]
dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'
cpath = '/'.join(['context' if var == 'ipynb' else var for var in path][:len(path)-2]) + '/'
sconnect = dpath + 'flow_scores.csv'
sconnectbu = dpath + 'flow_scores_bu.csv'
score_fbk = dpath + 'flow_scores_fb.csv'
tmpconnect = sconnect +'.tmp'
stemp = sconnect + '.new'
coff = 250;
nwloc = cpath + 'networkcontext.csv'
srcdict,srclist = {},[]
dstdict,dstlist = {},[]
sportdict,sportlist = {},[]
dportdict,dportlist = {},[]
In [ ]:
def apply_css_to_select(select):
select._css = (
(None, 'height', '90%'),
(None, 'width', '90%'),
('select', 'overflow-x', 'auto'),
('select', 'width', '100%'),
('select', 'margin', 0)
)
#load LDA scores#update lda doc with scores - either one edge,# or ALL that contain x IP and y port
def displaythis() :
# build dict of IP addresses
#sev,score, tstart,srcIP,dstIP,sport,dport,proto,ipkt,ibyt
display(Javascript("$('.widget-area > .widget-subarea > *').remove();"))
srcdict,srclist = {},[]
dstdict,dstlist = {},[]
sportdict,sportlist = {},[]
dportdict,dportlist = {},[]
srclist.append('- Select -')
dstlist.append('- Select -')
sportlist.append('- Select -')
dportlist.append('- Select -')
set_rules()
with open(sconnect, 'r') as f:
reader = csv.reader(f,delimiter=',')
reader.next();
rowct = 1
for row in reader:
if row[2] not in srcdict and row[0] == '0':
srclist.append(row[2])
srcdict[row[2]] = struct.unpack("!L", socket.inet_aton(row[2]))[0]
if row[3] not in dstdict and row[0] == '0':
dstlist.append(row[3])
dstdict[row[3]] = struct.unpack("!L", socket.inet_aton(row[3]))[0]
if row[4] not in sportdict and row[0] == '0':
sportlist.append(row[4])
sportdict[row[4]] = row[4]
if row[5] not in dportdict and row[0] == '0':
dportlist.append(row[5])
dportdict[row[5]] = row[5]
if rowct == coff:
break;
rowct += 1
# Source IP box
scrIpLalbel = widgets.HTML(value="Source IP:", height='10%', width='100%')
srcselect = widgets.Select(options=srclist, height='90%')
apply_css_to_select(srcselect)
srcIpBox = widgets.Box(width='25%', height='100%')
srcIpBox.children = (scrIpLalbel, srcselect)
# Destination IP box
dstIpLalbel = widgets.HTML(value="Dest IP:", height='10%', width='100%')
dstselect = widgets.Select(options=dstlist, height='90%')
apply_css_to_select(dstselect)
dstIpBox = widgets.Box(width='25%', height='100%')
dstIpBox.children = (dstIpLalbel, dstselect)
# Source Port box
scrPortLalbel = widgets.HTML(value="Src Port:", height='10%', width='100%')
sportselect = widgets.Select(options=sportlist, height='90%')
apply_css_to_select(sportselect)
srcPortBox = widgets.Box(width='20%', height='100%')
srcPortBox.children = (scrPortLalbel, sportselect)
# Destionation Port box
dstPortLalbel = widgets.HTML(value="Dst Port:", height='10%', width='100%')
dportselect = widgets.Select(options=dportlist,height='90%')
apply_css_to_select(dportselect)
dstPortBox = widgets.Box(width='20%', height='100%')
dstPortBox.children = (dstPortLalbel, dportselect)
# Quick Search and Actions Box
emptyLalbel = widgets.HTML(value=" ")
srctext = widgets.Text(value='', width='100%', placeholder='Quick IP scoring')
srctext._css = (
(None, 'width', '100%'),
)
ratingbut = widgets.RadioButtons(description='Rating:',options=['1', '2', '3'], width='100%')
assignbut = widgets.Button(description='Score', width='45%')
assignbut.button_style = 'primary'
updatebut = widgets.Button(description='Save', width='45%')
updatebut.button_style = 'primary'
updatebut._css = (
(None, 'margin-left', '10%'),
)
actionsBox = widgets.Box(width='20%', height='100%')
actionsBox.children = (emptyLalbel, srctext,ratingbut,assignbut,updatebut)
# Container Box
bigBox = widgets.HBox(width='90%', height=250)
bigBox.children = (srcIpBox, dstIpBox, srcPortBox, dstPortBox, actionsBox)
display(bigBox)
def update_sconnects(b):
clear_output()
time.sleep(.25)
dvals,svals = [], []
scored_threats = []
#define logic based on combo of input
#Gets input values
if srctext.value != '':
svals = [srctext.value,dstselect.value,sportselect.value,dportselect.value]
dvals = [srcselect.value,srctext.value,sportselect.value,dportselect.value]
else:
svals = [srcselect.value,dstselect.value,sportselect.value,dportselect.value]
dvals = []
risk = ratingbut.value
shash, dhash = 0, 0
fhash = [2,3,4,5]
for k in xrange(len(svals)):
if svals[k] == '- Select -': svals[k] = ''
if svals[k] != '': shash += 2**k
if len(dvals) > 0:
if dvals[k] == '- Select -': dvals[k] = ''
if dvals[k] != '': dhash += 2**k
rowct = 0
if shash > 0 or dhash > 0:
with open(tmpconnect,'w') as g:
riter = csv.writer(g,delimiter=',')
riter.writerow(["sev"]+["tstart"]+["srcIP"]+["dstIP"]+["sport"]+["dport"]+["proto"]+["flag"]+["ipkt"]+["ibyt"]+
["lda_score"]+["rank"]+["srcIpInternal"]+["destIpInternal"]+["srcGeo"]+["dstGeo"]+["srcDomain"]+
["dstDomain"]+ ["srcIP_rep"]+["dstIP_rep"])
with open(sconnect, 'r') as f:
reader = csv.reader(f,delimiter=',')
reader.next();
#sev,tstart,srcIP,dstIP,sport,dport,proto,flag,ipkt,ibyt,lda_score,rank,srcIpInternal,destIpInternal,srcGeo,
#dstGeo,srcDomain,dstDomain,gtiSrcRep,gtiDstRep,norseSrcRep,norseDstRep
for row in reader:
result, resultd = 0,0
for n in xrange(0,len(svals)):
if (2**n & shash) > 0:
if row[fhash[n]] == svals[n]:
result += 2**n
if result == shash:
row[0] = risk
scored_threats.append(row)
rowct += 1
if len(dvals) > 0:
for n in xrange(0,len(dvals)):
if (2**n & dhash) > 0:
if row[fhash[n]] == dvals[n]:
resultd += 2**n
if resultd == dhash:
row[0] = risk
scored_threats.append(row)
rowct += 1
riter.writerow(row)
#works on the feedback tab-separated file
if not os.path.exists(score_fbk):
with open(score_fbk, 'w') as feedback:
wr = csv.writer(feedback, delimiter='\t', quoting=csv.QUOTE_NONE)
wr.writerow(["sev"]+["tstart"]+["srcIP"]+["dstIP"]+["sport"]+["dport"]+["proto"]+["flag"]+["ipkt"]+["ibyt"]+
["lda_score"]+["rank"]+["srcIpInternal"]+["destIpInternal"]+["srcGeo"]+["dstGeo"]+["srcDomain"]+
["dstDomain"]+ ["srcIP_rep"]+["dstIP_rep"])
with open(score_fbk, 'a') as feedback:
for row in scored_threats:
wr = csv.writer(feedback, delimiter='\t', quoting=csv.QUOTE_NONE)
wr.writerow(row)
shutil.copyfile(tmpconnect,sconnect)
print "{0} matching connections scored".format(rowct)
def savesort(b):
# Clear last output
clear_output()
with open(stemp,'w') as g:
riter = csv.writer(g,delimiter=',')
riter.writerow(["sev"]+["tstart"]+["srcIP"]+["dstIP"]+["sport"]+["dport"]+["proto"]+["flag"]+["ipkt"]+["ibyt"]+
["lda_score"]+["rank"]+["srcIpInternal"]+["destIpInternal"]+["srcGeo"]+["dstGeo"]+["srcDomain"]+
["dstDomain"]+ ["srcIP_rep"]+["dstIP_rep"])
reader = csv.reader(open(sconnect), delimiter=",")
reader.next()
srtlist = sorted(reader, key=lambda x: (int(x[0]), float(x[11])))
riter.writerows(srtlist)
shutil.copyfile(stemp,sconnect)
print "Suspicious connects successfully updated"
display(Javascript('reloadParentData();'))
# Close widgets form
bigBox.close()
# Rebuild widgets form
displaythis()
ml_feedback()
assignbut.on_click(update_sconnects)
updatebut.on_click(savesort)
def set_rules():
rops = ['leq','leq','leq','leq','leq','leq']
rvals = ['','','',1024,'',54]
risk = 2
apply_rules(rops,rvals,risk)
rops = ['leq','leq','leq','leq','eq','eq']
rvals = ['','','',1024,3,152]
risk = 2
apply_rules(rops,rvals,risk)
rops = ['leq','leq','leq','leq','eq','eq']
rvals = ['','','',1024,2,104]
risk = 2
rops = ['leq','leq','eq','leq','leq','leq']
rvals = ['','',0,1023,'','']
risk = 2
apply_rules(rops,rvals,risk)
def apply_rules(rops,rvals,risk):
#define logic based on combo of input
rhash = 0
rfhash = [2,3,4,5,8,9]
for k in xrange(len(rvals)):
if rvals[k] != '':
rhash += 2**k
with open(tmpconnect,'w') as g:
riter = csv.writer(g,delimiter=',')
riter.writerow(["sev"]+["tstart"]+["srcIP"]+["dstIP"]+["sport"]+["dport"]+["proto"]+["flag"]+["ipkt"]+["ibyt"]+
["lda_score"]+["rank"]+["srcIpInternal"]+["destIpInternal"]+["srcGeo"]+["dstGeo"]+["srcDomain"]+
["dstDomain"]+ ["srcIP_rep"]+["dstIP_rep"])
with open(sconnect, 'r') as f:
reader = csv.reader(f,delimiter=',')
reader.next();
rowct = 1
for row in reader:
result = 0
for n in xrange(0,6):
if (2**n & rhash) > 0:
if rops[n] == 'leq':
if int(row[rfhash[n]]) <= int(rvals[n]):
result += 2**n
if rops[n] == 'eq':
if int(row[rfhash[n]]) == int(rvals[n]):
result += 2**n
if result == rhash:
row[0] = risk
rowct += 1
riter.writerow(row)
shutil.copyfile(tmpconnect,sconnect)
def attack_heuristics():
with open(sconnect, 'rb') as f:
reader = csv.reader(f,delimiter=',')
reader.next();
rowct = 1
for row in reader:
if row[2] not in srcdict:
srcdict[row[2]] = row[2]
if row[3] not in dstdict:
dstdict[row[3]] = row[3]
if row[4] not in sportdict:
sportdict[row[4]] = row[4]
if row[5] not in dportdict:
dportdict[row[5]] = row[5]
df = pd.read_csv(sconnect)
gb = df.groupby([u'srcIP'])
for srcip in srcdict:
try:
if len(gb.get_group(srcip)) > 20:
print srcip,'connects:',len(gb.get_group(srcip))
except:
print "Key Error for ip: " + srcip
def ml_feedback():
dst_name = os.path.basename(sconnect)
str_fb="DSOURCE={0} &&\
FDATE={1} &&\
source /etc/duxbay.conf &&\
usr=$(echo $LUSER | cut -f3 -d'/') &&\
mlnode=$MLNODE &&\
lpath=$LPATH &&\
scp {2} $usr@$mlnode:$lpath/{3}".format(dsource,date,score_fbk,dst_name)
subprocess.call(str_fb, shell=True)
Run attack heuristics.
In [ ]:
displaythis()
In [ ]:
# set_rules()
In [ ]:
# attack_heuristics()
In [ ]:
# !cp $sconnectbu $sconnect