In [ ]:
import struct, socket
import csv, json
import os
import datetime
import operator
import itertools
import md5
from collections import defaultdict
try:
import ipywidgets as widgets # For jupyter/ipython >= 1.4
except ImportError:
from IPython.html import widgets
from IPython.display import display, HTML, clear_output, Javascript
with open('/etc/duxbay.conf') as conf:
for line in conf.readlines():
if "DBNAME=" in line: DBNAME = line.split("=")[1].strip('\n').replace("'","");
elif "IMPALA_DEM=" in line: IMPALA_DEM = line.split("=")[1].strip('\n').replace("'","");
path = os.getcwd().split("/")
date = path[len(path)-1]
dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'
sconnect = dpath + 'proxy_scores.csv'
threat_f = dpath + "threats.csv"
anchor = ''
anchor_hash = ''
reqmethods = defaultdict(int)
rescontype = defaultdict(int)
referers = defaultdict(int)
refered = defaultdict(int)
requests = []
top_results = 20
Interface
In [ ]:
##Expanded search
susp_box = widgets.HBox(width=500, height=150)
susp_h_box = widgets.Box(width=500, height=200)
susp_title = widgets.HTML(value='<h4>Expanded search</h4>')
susp_select = widgets.Select(width=300)
##Search Results
result_title = widgets.HTML()
result_html = widgets.HTML(width=500)
result_box = widgets.Box(width=500)
result_box.children = [result_title, result_html]
#Threat Summary
tc_box_main = widgets.Box(width=500, height=200)
threat_container = widgets.HBox(width=500, height=150)
tc_box_separator = widgets.Box(width=500, height=15)
yy = date[0:4]
mm = date[4:6]
dd = date[6:8]
uri_sev=[]
def start_investigation():
display(Javascript("$('.widget-area > .widget-subarea > *').remove();"))
clear_output()
c_uri = []
#discards threats already commented
if os.path.isfile(threat_f) and not file_is_empty(threat_f):
with open(threat_f, 'r') as th:
t_read = csv.reader(th, delimiter='|')
t_read.next()
for row in t_read:
if row[0] != '' : c_uri.append(row[0])
with open(sconnect, 'r') as f:
reader = csv.reader(f, delimiter=',')
reader.next()
for row in reader:
# "p_date":0 , "p_time":1, "clientip":2 , "host":3, "reqmethod":4 , "useragent":5 , "resconttype":6
# , "duration":7, "username":8 , "webcat":9, "referer":10, "respcode":11, "uriport":12, "uripath":13
# , "uriquery":14, "serverip":15, "scbytes":16 , "csbytes":17, "fulluri":18, "word":19
#Forms a hash out of the anchor to use as the file name
if row[22] == '1':
row_hash = md5.new(str(row[18])).hexdigest()
if row[18] not in uri_sev and row_hash not in c_uri:
uri_sev.append(row[18])
if len(uri_sev) == 0:
result_html = widgets.HTML(value="There are not high risk results.", width=500)
result_box = widgets.Box(width=500, height=150)
result_box.children = [result_html]
display(result_box)
else:
sorted_dict = sorted(uri_sev, key=operator.itemgetter(0))
display_controls(sorted_dict)
def display_controls(threat_list):
susp_title = widgets.HTML(value='<h4>Expanded search</h4>')
search_btn = widgets.Button(description='Search')
susp_box.children = [susp_select,search_btn]
susp_h_box.children = [susp_title,susp_box]
susp_select.options = threat_list
susp_select.height=150
susp_select.selected_label = threat_list[0]
display(susp_h_box)
def search_ip(b):
global anchor
global anchor_hash
global ir_f
anchor=''
anchor_hash = ''
anchor = susp_select.value
anchor_hash = md5.new(str(anchor)).hexdigest()
removeWidget(2)
removeWidget(1)
clear_output()
height=80
ir_f = dpath + 'es-' + anchor_hash + ".csv"
table = "<table border=1><th>TIME</th><th>CLIENT IP</th><th>USERNAME</th><th>DURATION</th> \
<th>FULL URI</th><th>WEB CATEGORY</th><th>RESPONSE CODE</th><th>REQUEST METHOD</th><th>USER AGENT</th> \
<th>MIME TYPE</th><th>REFERER</th><th>URI PORT</th><th>PROXY IP</th><th>SERVER BYTES</th><th>CLIENT BYTES</th>"
if not os.path.isfile(ir_f) or (os.path.isfile(ir_f) and file_is_empty(ir_f)):
# time:0, clientip:1, username:2, duration:3, fullURI:4, webcat:5, respcode:6, reqmethod:7
# useragent:8, resconttype: 9, referer: 10, uriport:11, serverip:12, scbytes:13, csbytes:14
imp_query = ("\"SELECT p_time, clientip, username, duration, fulluri, webcat, respcode, reqmethod,\
useragent, resconttype, referer, uriport, serverip, scbytes, csbytes FROM {0}.proxy\
WHERE y={1} AND m={2} AND d={3} AND (fulluri='{4}' OR referer ='{4}') ORDER BY p_time\"")
imp_query = imp_query.format(DBNAME,yy,mm,dd,anchor)
!impala-shell -i $IMPALA_DEM --print_header -B --output_delimiter='\t' -q $imp_query -o $ir_f
clear_output()
req_method = {}
with open(ir_f, 'r') as f:
#Creates default dictionaries
global reqmethods
global rescontype
global referers
global refered
global requests
reader = csv.reader(f, delimiter='\t')
if reader!= '':
reader.next() # Skip headers
i=0
for row in reader:
reqmethods[row[7]]+=1
rescontype[row[9]]+=1
if row[10] != anchor:
#Source URI's that refered the user to the threat
referers[row[10]]+=1
if({'clientip':row[1],'referer':row[10],'reqmethod':row[7],'resconttype':row[9]}) not in requests:
requests.append({'clientip':row[1],'referer':row[10],'reqmethod':row[7],'resconttype':row[9]})
if i < top_results:
table += "<tr><td>"+row[0]+"</td><td>"+row[1]+"</td><td>"+row[2]+"</td><td>"+row[3]+"</td>\
<td>"+row[4]+"</td><td>"+row[5]+"</td><td>"+row[6]+"</td><td>"+row[7]+"</td><td>"+row[8]+"</td>\
<td>"+row[9]+"</td><td>"+row[10]+"</td><td>"+row[11]+"</td><td>"+row[12]+"</td>\
<td>"+row[13]+"</td><td>"+row[14]+"</td></tr>"
else:
#Destination URI's refered by the threat
refered[row[4]]+=1
height += 20
i+=1
table += "</table>"
result_html.value=table
result_title.value='<h4>Displaying top {0} search results</h4>'.format(top_results)
result_box.height=max(200, height)
display_threat_box(anchor)
display(result_box)
search_btn.on_click(search_ip)
def display_threat_box(ip):
tc_div_label = widgets.HTML(value='<h4>Threat summary for ' + anchor +'</h4>')
tc_txt_title = widgets.Text(value='',width=300, placeholder='Threat Title')
tc_txa_summary = widgets.Textarea(value='', width=300, height=100)
tc_btn_save = widgets.Button(description='Save')
threat_container.children = [tc_txa_summary,tc_btn_save]
tc_box_main.children = [tc_div_label, tc_txt_title, tc_box_separator, threat_container]
display(tc_box_main)
def save_threat_summary(b):
global anchor
global anchor_
if anchor != '':
global threat_f
if not os.path.exists(threat_f):
with open(threat_f, 'w') as comment:
comment.write('hash|title|summary\n')
with open(threat_f, 'a') as comment:
comment.write(anchor_hash + '|' + tc_txt_title.value + '|' +
tc_txa_summary.value.replace('\n', '\\n') + '\n')
removeWidget(2)
removeWidget(1)
display(Javascript("$(\"option[data-value='" + anchor +"']\").remove();"))
clear_output()
response = "Successfully saved"
else:
response = "No data selected"
incident_progression(anchor, anchor_hash)
timeline(anchor, anchor_hash)
save_html = widgets.HTML(value=response, width=500)
save_box = widgets.Box(width=500, height=150)
save_box.children = [save_html]
susp_select.selected_label = susp_select.options[0]
display(save_box)
tc_btn_save.on_click(save_threat_summary)
def incident_progression(anchor, anchor_hash):
file_name = dpath + 'incident-progression-'+anchor_hash+'.json'
jsonstring = json.dumps({'fulluri':anchor, 'requests':requests,'referer_for':referers.keys()})
if not os.path.exists(file_name):
with open(file_name, 'w') as f:
f.write(jsonstring)
print "Incident progression successfuly created"
def timeline(anchor, anchor_hash):
if anchor != "":
sbdet_f = dpath + "timeline-"+anchor_hash+".tsv"
if not os.path.isfile(sbdet_f) or (os.path.isfile(sbdet_f) and file_is_empty(sbdet_f)):
imp_query = "\"SELECT concat(cast(p_date as string), ' ', cast(MIN(p_time) as string)) AS tstart,\
concat(cast(p_date as string), ' ', cast(MAX(p_time) as string)) AS tend, SUM(duration) AS duration,\
clientip, respcode from {0}.proxy WHERE fulluri='{1}' \
AND y={2} AND m={3} AND d={4} GROUP BY clientip, respcode, p_date ORDER BY clientip\""
imp_query=imp_query.format(DBNAME,anchor,yy,mm,dd)
!impala-shell -i $IMPALA_DEM --print_header -B --output_delimiter='\t' -q $imp_query -o $sbdet_f
clear_output()
print "Timeline successfully created"
else:
print "Timeline couldn't be created"
def file_is_empty(path):
return os.stat(path).st_size==0
def removeWidget(index):
js_command = "$('.widget-area > .widget-subarea > .widget-box:eq({0})').remove();".format(index)
display(Javascript(js_command))
In [ ]:
start_investigation()