In [5]:
import struct, socket
import numpy as np
import linecache, bisect
import csv
import operator
import json
import os
try:
import ipywidgets as widgets # For jupyter/ipython >= 1.4
except ImportError:
from IPython.html import widgets
from IPython.display import display, Javascript, clear_output
with open('/etc/duxbay.conf') as conf:
for line in conf.readlines():
if "DBNAME=" in line: DBNAME = line.split("=")[1].strip('\n').replace("'","");
elif "IMPALA_DEM=" in line: IMPALA_DEM = line.split("=")[1].strip('\n').replace("'","");
spath = os.getcwd()
path = spath.split("/")
date = path[len(path)-1]
dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'
cpath = '/'.join(['context' if var == 'ipynb' else var for var in path][:len(path)-2]) + '/'
sconnect = dpath + 'flow_scores.csv'
threats_file = dpath + 'threats.csv'
iploc = cpath + 'iploc.csv'
nwloc = cpath + 'networkcontext_1.csv'
anchor_ip = ''
ir_f = ''
threat_name = ''
iplist = ''
top_inbound_b=''
top_results = 20
if os.path.isfile(iploc):
iplist = np.loadtxt(iploc,dtype=np.uint32,delimiter=',',usecols={0}, converters={0: lambda s: np.uint32(s.replace('"',''))})
else:
print "No iploc.csv file was found, Map View map won't be created"
Functions Definition
In [6]:
def display_controls(ip_list):
container = widgets.HBox(width=550, height=150)
h_container = widgets.Box(width=550, height=200)
label = widgets.HTML(value='<h2>Expanded Search</h2>')
ip_select = widgets.Select(options=ip_list,height=min(len(ip_list)*18+18,150),width=200)
search_button = widgets.Button(description='Search')
container.children = [ip_select,search_button]
h_container.children = [label,container]
display(h_container)
def search_ip(b):
global anchor_ip
global top_inbound_b
anchor_ip = ip_select.value
if anchor_ip != "":
clear_output()
removeWidget(1)
print "Searching for ip: " + anchor_ip
global ir_f
ir_f = dpath + "ir-" + anchor_ip + ".tsv"
if not os.path.isfile(ir_f) or (os.path.isfile(ir_f) and file_is_empty(ir_f)):
imp_query = (" \"SELECT min(treceived) as firstSeen, max(treceived) as lastSeen, sip as srcIP, dip as dstIP, " +
"sport as SPort, dport AS Dport, count(sip) as conns, max(ipkt) as maxPkts, avg(ipkt) " +
"as avgPkts, max(ibyt) as maxBytes, avg(ibyt) as avgBytes FROM "+DBNAME+".flow WHERE " +
"y="+ date[0:4] +" AND m="+ date[4:6] +" AND d="+ date[6:] +" " +
" AND (sip =\'" + anchor_ip + "\' OR dip=\'" + anchor_ip + "\') GROUP BY sip, dip,sport,dport\" ")
!impala-shell -i $IMPALA_DEM --print_header -B --output_delimiter='\t' -q $imp_query -o $ir_f
clear_output()
print "\n Looking for additional details..."
get_in_out_and_twoway_conns()
add_geospatial_info()
add_network_context()
print anchor_ip + ": connected to " + str(len(inbound.keys())) + " IPs"
top_inbound_b = get_top_bytes(inbound,top_results)
top_inbound_conns = get_top_conns(inbound,top_results)
top_inbound_b.update(top_inbound_conns) # merge the two dictionaries
display_threat_box(anchor_ip)
search_button.on_click(search_ip)
def display_threat_box(ip):
container_summary = widgets.HBox(width=550, height=150)
separator = widgets.Box(width=550, height=15)
main_container = widgets.Box(width=550, height=200)
threat_header = widgets.HTML(value='<h4>Threat summary for ' + anchor_ip +'</h4>')
threat_title_box = widgets.Text(value='',width=200, placeholder='Threat Title')
threat_summary_box = widgets.Textarea(value='',width=300, height=100)
save_button = widgets.Button(description='Save')
container_summary.children = [threat_summary_box,save_button]
main_container.children = [threat_header, threat_title_box, separator, container_summary]
display(main_container)
def save_threat_summary(b):
clear_output()
removeWidget(1)
global threat_name
global top_inbound_b
print "Creating Story Board elements ..."
generate_attack_map_file(anchor_ip, top_inbound_b, outbound, twoway)
generate_stats(anchor_ip, top_inbound_b, outbound, twoway, threat_name)
generate_dendro(anchor_ip, top_inbound_b, outbound, twoway, date)
details_inbound(anchor_ip,top_inbound_b)
add_threat(anchor_ip, threat_title_box.value, threat_summary_box.value.replace('\n', '\\n'))
print "Story board successfully created for {0}".format(anchor_ip)
save_button.on_click(save_threat_summary)
def details_inbound(ip, inbound):
if ip != "" and len(inbound) > 0:
if os.path.isfile(ir_f):
sbdet_f = dpath + "sbdet-" + ip + ".tsv"
if not os.path.isfile(sbdet_f) or (os.path.isfile(sbdet_f) and file_is_empty(sbdet_f)):
imp_query = "SELECT min(treceived) as tstart, max(treceived) as tend, sip as srcIP, "
+ "dip as dstIP, proto as Proto, sport as SPort, dport AS Dport,ipkt as "
+ "Pkts, ibyt as Bytes FROM "+DBNAME+".flow WHERE "
+ "y="+ date[0:4] +" AND m="+ date[4:6] +" AND d="+ date[6:]
+ " AND ((dip IN({0}) "
+ "AND sip ='{1}') OR "
+ "(sip IN({0}) "
+ "AND dip ='{1}')) GROUP BY sip, dip, proto, sport, dport, ipkt, ibyt SORT BY tstart"
ips = "'" + "','".join(inbound.keys()) + "'"
imp_query = imp_query.format(ips,ip)
!impala-shell -i $IMPALA_DEM --print_header -B --output_delimiter='\t' -q $imp_query -o $sbdet_f
print "Timeline successfully created"
else:
print "Timeline couldn't be created"
def generate_dendro(ip, inbound, outbound, twoway, date):
dendro_fpath = dpath + 'threat-dendro-' + anchor_ip + ".json"
obj = {
'name':ip,
'children': [],
'time': date
}
#----- Add Inbound Connections-------#
obj["children"].append({'name': 'Inbound Only', 'children': [], 'impact': 0})
in_ctxs = {}
for ip in inbound:
if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:
ctx = inbound[ip]['nwloc'][2] # get the machine type Only for vast Data
if ctx not in in_ctxs:
in_ctxs[ctx] = 1
else:
in_ctxs[ctx] += 1
for ctx in in_ctxs:
obj["children"][0]['children'].append({
'name': ctx,
'impact': in_ctxs[ctx]
})
#------ Add Outbound ----------------#
obj["children"].append({'name': 'Outbound Only', 'children': [], 'impact': 0})
out_ctxs = {}
for ip in outbound:
if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:
ctx = outbound[ip]['nwloc'][2] # get the machine type Only for vast Data
if ctx not in out_ctxs:
out_ctxs[ctx] = 1
else:
out_ctxs[ctx] += 1
for ctx in out_ctxs:
obj["children"][1]['children'].append({
'name': ctx,
'impact': out_ctxs[ctx]
})
#------ Add Outbound ----------------#
obj["children"].append({'name': 'two way', 'children': [], 'impact': 0})
tw_ctxs = {}
for ip in twoway:
if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:
ctx = twoway[ip]['nwloc'][2] # get the machine type Only for vast Data
if ctx not in tw_ctxs:
tw_ctxs[ctx] = 1
else:
tw_ctxs[ctx] += 1
for ctx in tw_ctxs:
obj["children"][2]['children'].append({
'name': ctx,
'impact': tw_ctxs[ctx]
})
with open(dendro_fpath, 'w') as dendro_f:
dendro_f.write(json.dumps(obj))
print 'Incident progression successfully created'
def generate_stats(ip, inbound, outbound, twoway, threat_name):
stats_fpath = dpath + 'stats-' + anchor_ip + ".json"
obj = {
'name':threat_name,
'children': [],
'size': len(inbound) + len(outbound) + len(twoway)
}
#----- Add Inbound Connections-------#
obj["children"].append({'name': 'Inbound Only', 'children': [], 'size': len(inbound)})
in_ctxs = {}
for ip in inbound:
full_ctx = ''
if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:
full_ctx = inbound[ip]['nwloc'][2].split('.')[0]
ctx = get_ctx_name(full_ctx) # get the machine type Only for vast Data
if ctx not in in_ctxs:
in_ctxs[ctx] = 1
else:
in_ctxs[ctx] += 1
for ctx in in_ctxs:
obj["children"][0]['children'].append({
'name': ctx,
'size': in_ctxs[ctx]
})
#------ Add Outbound ----------------#
obj["children"].append({'name': 'Outbound Only', 'children': [], 'size': len(outbound)})
out_ctxs = {}
for ip in outbound:
full_ctx = ''
if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:
full_ctx = outbound[ip]['nwloc'][2].split('.')[0]
ctx = get_ctx_name(full_ctx) # get the machine type Only for vast Data
if ctx not in out_ctxs:
out_ctxs[ctx] = 1
else:
out_ctxs[ctx] += 1
for ctx in out_ctxs:
obj["children"][1]['children'].append({
'name': ctx,
'size': out_ctxs[ctx]
})
#------ Add Twoway ----------------#
obj["children"].append({'name': 'two way', 'children': [], 'size': len(twoway)})
tw_ctxs = {}
for ip in twoway:
full_ctx = ''
if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:
full_ctx = twoway[ip]['nwloc'][2].split('.')[0]
ctx = get_ctx_name(full_ctx) # get the machine type Only for vast Data
if ctx not in tw_ctxs:
tw_ctxs[ctx] = 1
else:
tw_ctxs[ctx] += 1
for ctx in tw_ctxs:
obj["children"][2]['children'].append({
'name': ctx,
'size': tw_ctxs[ctx]
})
json_str = json.dumps(obj)
with open(stats_fpath, 'w') as stats_f:
stats_f.write(json_str)
print 'Stats file successfully created'
def get_ctx_name(full_context):
ctx= 'DMZ'
if "VPN" in full_context:
ctx = "VPN"
elif "DMZ" in full_context:
ctx = "DMZ"
elif "Proxy" in full_context:
ctx = "Proxy"
elif "FW" in full_context:
ctx = "FW"
return ctx
def display_expanded_search():
external_ips = []
c_ips=[]
clear_output()
if os.path.isfile(threats_file) and (os.path.isfile(threats_file) and not file_is_empty(threats_file)):
with open(threats_file, 'r') as th:
t_read = csv.reader(th, delimiter=',')
t_read.next()
for row in t_read:
if row[0] != '' : c_ips.append(row[0])
with open(sconnect, 'r') as f:
reader = csv.reader(f, delimiter=',')
reader.next()
#Internal Netflows use case:
for row in reader:
#sev,tstart,srcIP,dstIP,sport,dport,proto,flag,ipkt,ibyt,lda_score,rank,srcIpInternal,destIpInternal,srcGeo,dstGeo,
# 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
#srcDomain,dstDomain,srcIP_rep,dstIP_rep
# 16 , 17 , 18 , 19
if row[0] == '1':
srcIP = ''
dstIP = ''
if row[2] not in external_ips and row[2] not in c_ips:
external_ips.append(row[2])
if row[3] not in external_ips and row[3] not in c_ips:
external_ips.append(row[3])
if len(external_ips) > 0:
display_controls(external_ips)
else:
print "There are not high risk connections."
# calculate number of inbound only, two-way, and outbound only
# build dict of IP addresses
# firstSeen,lastSeen,srcIP, dstIP, sport,dport,conns, maxPkts, avgPkts,maxBytes, avgBytes
def get_in_out_and_twoway_conns():
global inbound
inbound = {}
global outbound
outbound = {}
global twoway
twoway = {}
srcdict = {}
dstdict = {}
conns_dict= {}
if os.path.isfile(ir_f):
with open(ir_f, 'r') as f:
reader = csv.reader(f,delimiter='\t')
reader.next() #skip headers
rowct = 0
for row in reader:
if row != []:
srcdict[row[2]] = {
'ip_int': struct.unpack("!L", socket.inet_aton(row[2]))[0],
'dst_ip': row[3],
'dst_ip_int': struct.unpack("!L", socket.inet_aton(row[3]))[0],
'conns': int(row[6]),
'maxbytes': int(row[9])
}
dstdict[row[3]] = {
'ip_int': struct.unpack("!L", socket.inet_aton(row[3]))[0],
'src_ip': row[2],
'src_ip_int': struct.unpack("!L", socket.inet_aton(row[2]))[0],
'conns': int(row[6]),
'maxbytes': int(row[9])
}
rowct +=1
if rowct > 0:
for result in srcdict:
if result in dstdict:
twoway[result] = srcdict[result]
else:
outbound[result] = srcdict[result]
for result in dstdict:
if result not in srcdict:
inbound[result] = dstdict[result]
print "Input, Output & Two way connections file detected."
else:
print "Couldn't find any matching connections."
#=========== Adds GEO IP information to the outbound, inbound and twoway connections==============================#
def add_geospatial_info():
# get geospatial info, only when iplocation file is available
if iplist != '':
for srcip in outbound:
reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,outbound[srcip]['ip_int'])).replace('\n','')])
outbound[srcip]['geo'] = reader.next()
reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,outbound[srcip]['dst_ip_int'])).replace('\n','')])
outbound[srcip]['geo_dst'] = reader.next()
for dstip in twoway:
reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,twoway[dstip]['ip_int'])).replace('\n','')])
twoway[dstip]['geo'] = reader.next()
for srcip in inbound:
reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,inbound[srcip]['ip_int'])).replace('\n','')])
inbound[srcip]['geo'] = reader.next()
reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,inbound[srcip]['src_ip_int'])).replace('\n','')])
inbound[srcip]['geo_src'] = reader.next()
# need some way to combine timelines of outbound and two-way with big picture inbound only
# get network context - get start and end ranges
# need some way to combine timelines of outbound and two-way with big picture inbound only
# get network context - get start and end ranges
def add_network_context():
nwdict = {}
if os.path.isfile(nwloc) :
with open(nwloc, 'r') as f:
reader = csv.reader(f,delimiter=',')
reader.next()
#address range, description
for row in reader:
if '/' in row[0]:
#Range in subnet
iprange = row[0].split('/')
if len(iprange) < 2:
ipend = 0
else:
ipend = int(iprange[1])
nwdict[row[0]] = [struct.unpack("!L", socket.inet_aton(iprange[0]))[0],
struct.unpack("!L", socket.inet_aton(iprange[0]))[0]+2**(32-ipend)-1, row[1]]
elif '-' in row[0]:
#IP Range
iprange = row[0].split('-')
nwdict[row[0]] = [struct.unpack("!L", socket.inet_aton(iprange[0].replace(" ", "")))[0],
struct.unpack("!L", socket.inet_aton(iprange[1].replace(" ", "")))[0], row[1]]
else:
#Exact match
nwdict[row[0]] = [struct.unpack("!L", socket.inet_aton(row[0]))[0],
struct.unpack("!L", socket.inet_aton(row[0]))[0], row[1]]
for srcip in outbound:
temp_ip = struct.unpack("!L", socket.inet_aton(srcip))[0]
if srcip in nwdict:
inbound[srcip]['nwloc'] = nwdict[srcip]
else:
matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]
outbound[srcip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''
for dstip in twoway:
temp_ip = struct.unpack("!L", socket.inet_aton(dstip))[0]
if dstip in nwdict:
twoway[dstip]['nwloc'] = nwdict[dstip]
else:
matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]
twoway[dstip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''
for srcip in inbound:
temp_ip = struct.unpack("!L", socket.inet_aton(srcip))[0]
if srcip in nwdict:
inbound[srcip]['nwloc'] = nwdict[srcip]
else:
matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]
inbound[srcip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''
def generate_attack_map_file(ip, inbound, outbound, twoway):
if iplist != '':
globe_fpath = dpath + 'globe-' + anchor_ip + ".json"
globe_json = {}
globe_json['type'] = "FeatureCollection"
globe_json['sourceips'] = []
globe_json['destips'] = []
for srcip in twoway:
try:
row = twoway[srcip]['geo']
globe_json['destips'].append({
'type': 'Feature',
'properties': {
'location':row[8],
'ip':srcip,
'type':1
},
'geometry': {
'type': 'Point',
'coordinates': [float(row[7]), float(row[6])]
}
})
except ValueError:
pass
for dstip in outbound:
try:
row = outbound[dstip]['geo']
dst_geo = outbound[dstip]['geo_dst']
globe_json['sourceips'].append({
'type': 'Feature',
'properties': {
'location':row[8],
'ip':dstip,
'type':3
},
'geometry': {
'type': 'Point',
'coordinates': [float(row[7]), float(row[6])]
}
})
globe_json['destips'].append({
'type': 'Feature',
'properties': {
'location':row[8],
'ip':outbound[dstip]['dst_ip'],
'type':3
},
'geometry': {
'type': 'Point',
'coordinates': [float(dst_geo[7]), float(dst_geo[6])]
}
})
except ValueError:
pass
for dstip in inbound:
try:
row = inbound[dstip]['geo']
dst_geo = inbound[dstip]['geo_src']
globe_json['sourceips'].append({
'type': 'Feature',
'properties': {
'location':row[8],
'ip':dstip,
'type':2
},
'geometry': {
'type': 'Point',
'coordinates': [float(row[7]), float(row[6])]
}
})
globe_json['destips'].append({
'type': 'Feature',
'properties': {
'location':row[8],
'ip':inbound[dstip]['src_ip'],
'type':2
},
'geometry': {
'type': 'Point',
'coordinates': [float(dst_geo[7]), float(dst_geo[6])]
}
})
except ValueError:
pass
json_str = json.dumps(globe_json)
with open(globe_fpath, 'w') as globe_f:
globe_f.write(json_str)
print "Geolocation map successfully created"
else:
print "The map can't be created without an iploc file"
def add_threat(ip,threat_title, threat_comment):
content = ''
try:
threat_f = open(threats_file, 'r')
content = threat_f.read()
if '{0}|{1}|{2}\n'.format(ip,threat_title,threat_comment) not in content:
content += '{0}|{1}|{2}\n'.format(ip,threat_title,threat_comment)
threat_f.close()
except:
content = 'ip|title|summary\n'
content += '{0}|{1}|{2}\n'.format(ip,threat_title,threat_comment)
threat_fw = open(threats_file, 'w')
threat_fw.write(content)
threat_fw.close()
def get_top_bytes(conns_dict, top):
print "Now looking at the top "+ str(top) +" connections per bytes:"
topbytes = sorted(conns_dict.iteritems(), key=lambda (x,y): y['maxbytes'], reverse=True)
topbytes = topbytes[0:top]
for item in topbytes:
print item[0], "|", item[1]["maxbytes"]
return dict(topbytes)
def get_top_conns(conns_dict, top):
print "Now looking at the top "+str(top)+" connections per number of connections:"
topconns = sorted(conns_dict.iteritems(), key=lambda (x,y): y['conns'], reverse=True)
topconns = topconns[0:top]
for item in topconns:
print item[0], "|", item[1]["conns"]
return dict(topconns)
def file_is_empty(path):
return os.stat(path).st_size==0
def removeWidget(index):
js_command = "$('.widget-area > .widget-subarea > .widget-box:eq({0})').remove();".format(index)
display(Javascript(js_command))
In [7]:
display_expanded_search()