In [ ]:
import struct, socket
import csv, json 
import os 
import datetime
import operator
import itertools
import md5
from collections import defaultdict 

try:
    import ipywidgets as widgets # For jupyter/ipython >= 1.4
except ImportError:
    from IPython.html import widgets
from IPython.display import display, HTML, clear_output, Javascript 

with open('/etc/spot.conf') as conf:
    for line in conf.readlines():
        if "DBNAME=" in line: DBNAME = line.split("=")[1].strip('\n').replace("'","");      
        elif "IMPALA_DEM=" in line: IMPALA_DEM = line.split("=")[1].strip('\n').replace("'",""); 
            
path = os.getcwd().split("/") 
date = path[len(path)-1]   
dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'
sconnect = dpath + 'proxy_scores.tsv' 
threat_f = dpath + "threats.csv"
anchor = ''
anchor_hash = ''
clientips  = defaultdict(int)
reqmethods = defaultdict(int)
rescontype = defaultdict(int)
referers   = defaultdict(int)
refered    = defaultdict(int)
requests = []
top_results = 20
details_limit = 1000

In [ ]:
# Widget styles and initialization
topBox = widgets.Box()
bottomBox = widgets.Box()
mainBoxes_css = (
    (None, 'width', '90%'),
    (None, 'margin', '0 auto'),
)

topBox._css = mainBoxes_css
bottomBox._css = mainBoxes_css 

threatBox = widgets.HBox(width='100%', height='auto')
threat_title = widgets.HTML(height='25px', width='100%')
threat_list_container = widgets.Box(width='80%', height='100%')
threat_button_container = widgets.Box(width='20%', height='100%')
susp_select = widgets.Select(height='100%', width='99%')
search_btn = widgets.Button(description='Search',height='100%', width='65px')
search_btn.button_style = 'primary'
susp_select._css = (
    (None, 'height', '90%'),
    (None, 'width', '95%'),
    ('select', 'overflow-x', 'auto'),
    ('select', 'margin', 0)
)

resultSummaryBox = widgets.Box()
result_title = widgets.HTML(width='100%')
result_summary_box = widgets.HBox(width='100%')
result_summary_container = widgets.Box(width='80%')
result_button_container =  widgets.Box(width='20%')
result_summary_box.children = [result_title, result_summary_container, result_button_container]
 
resultTableBox = widgets.Box()
result_html_title = widgets.HTML(height='25px', width='100%')
result_html_box = widgets.Box() #this one has the scroll
result_html = widgets.HTML(width='100%')
result_box_css = (
    (None, 'overflow', 'hidden'),
    (None, 'width', '100%'),
)

resultSummaryBox._css = result_box_css
resultTableBox._css = result_box_css
 
result_html_box._css = (
    (None, 'overflow','auto'),
    (None, 'max-height', '300px'), 
)

threat_button_container._css = (
    (None, 'padding-top', '30px'), 
)  

topBox.children = [threatBox]
bottomBox.children = [resultSummaryBox,resultTableBox]

threat_list_container.children = [threat_title,susp_select]
threat_button_container.children = [search_btn]
threatBox.children = [threat_list_container, threat_button_container]

Interface


In [ ]:
yy = date[0:4]
mm = date[4:6] 
dd = date[6:8]


def fill_list(list_control,source):
    susp_select.options = list_control
    susp_select.selected_label = list_control[0]


def data_loader():
    clear_output() 
    c_uri = []
    uri_sev=[]

    #discards threats already commented
    if os.path.isfile(threat_f) and not file_is_empty(threat_f):
        with open(threat_f, 'r') as th:
            t_read = csv.reader(th, delimiter='|')
            t_read.next()
            for row in t_read: 
                if row[0] != '' : c_uri.append(row[0])
            
    with open(sconnect, 'r') as f:
        reader = csv.reader(f, delimiter='\t')
        reader.next()
        for row in reader:
        #   "p_date":0 , "p_time":1, "clientip":2 , "host":3, "reqmethod":4 , "useragent":5 , "resconttype":6
        # , "duration":7, "username":8 , "webcat":9, "referer":10, "respcode":11, "uriport":12, "uripath":13
        # , "uriquery":14, "serverip":15, "scbytes":16 , "csbytes":17, "fulluri":18, "word":19
            #Forms a hash out of the anchor to use it as the file name
            if row[22] == '1': 
                row_hash = md5.new(str(row[18])).hexdigest()
                if row[18] not in uri_sev and row_hash not in c_uri:
                    uri_sev.append(row[18])

    threat_title.value ="<h4>Suspicious URI</h4>"
                       
    if len(uri_sev) == 0:
        display(Javascript("$('.widget-area > .widget-subarea > *').remove();"))   
        display(widgets.HTML(value="There are not high risk results."),)
    else:  
        sorted_dict = sorted(uri_sev, key=operator.itemgetter(0))       
        fill_list(sorted_dict,susp_select)     
        
    
def start_investigation(): 
    display(Javascript("$('.widget-area > .widget-subarea > *').remove();"))    
    data_loader()
    if susp_select.options:
        display_controls()  

        
def display_controls():  
    display(topBox) 
  
    def search_ip(b):  
        global anchor  
        global anchor_hash
        global ir_f
        anchor='' 
        anchor_hash = ''
        anchor = susp_select.value   
        anchor_hash = md5.new(str(anchor)).hexdigest()
        removeWidget(3)
        removeWidget(2)
        removeWidget(1) 
        height=80        
        ir_f = dpath + 'es-' + anchor_hash + ".csv" 
        table = "<table><th>TIME</th><th>CLIENT IP</th><th>USERNAME</th><th>DURATION</th> \
        <th>FULL URI</th><th>WEB CATEGORY</th><th>RESPONSE CODE</th><th>REQUEST METHOD</th><th>USER AGENT</th> \
        <th>MIME TYPE</th><th>REFERER</th><th>URI PORT</th><th>PROXY IP</th><th>SERVER BYTES</th><th>CLIENT BYTES</th>"
        
        if not os.path.isfile(ir_f) or (os.path.isfile(ir_f) and file_is_empty(ir_f)):
            # time:0, clientip:1, username:2, duration:3, fullURI:4, webcat:5, respcode:6, reqmethod:7
            # useragent:8, resconttype: 9, referer: 10, uriport:11, serverip:12, scbytes:13, csbytes:14
            imp_query = ("\"SELECT p_time, clientip, username, duration, fulluri, webcat, respcode, reqmethod,\
                 useragent, resconttype, referer, uriport, serverip, scbytes, csbytes FROM {0}.proxy\
                 WHERE y='{1}' AND m='{2}' AND d='{3}' AND (fulluri='{4}' OR referer ='{4}') ORDER BY p_time\"")   
            
            imp_query = imp_query.format(DBNAME,yy,mm,dd,anchor) 
            !impala-shell -i $IMPALA_DEM --quiet -q "INVALIDATE METADATA"
            !impala-shell -i $IMPALA_DEM --quiet --print_header -B --output_delimiter='\t' -q $imp_query -o $ir_f
           
        clear_output() 
        req_method = {}

        with open(ir_f, 'r') as f:
            #Creates default dictionaries
            global reqmethods
            global rescontype
            global referers
            global refered
            global requests
            global clientips
            clientips  = defaultdict(int)
            reqmethods = defaultdict(int)
            rescontype = defaultdict(int)
            referers   = defaultdict(int)
            refered    = defaultdict(int)
            try:
                reader = csv.reader(f, delimiter='\t')
                reader.next() # Skip headers
                i=0         
                for row in reader:
                    clientips[row[1]]+=1
                    reqmethods[row[7]]+=1
                    rescontype[row[9]]+=1
                    if row[10] != anchor:
                        #Source URI's that refered the user to the threat
                        referers[row[10]]+=1
                        if({'clientip':row[1],'referer':row[10],'reqmethod':row[7],'resconttype':row[9]}) not in requests:
                            requests.append({'clientip':row[1],'referer':row[10],'reqmethod':row[7],'resconttype':row[9]})
                        if i < top_results:
                            table += "<tr><td>"+row[0]+"</td><td>"+row[1]+"</td>\
                            <td><div class='spot-text-wrapper' data-toggle='tooltip'>"+row[2]+"</div></td><td>"+row[3]+"</td>\
                            <td><div class='spot-text-wrapper' data-toggle='tooltip'>"+row[4]+"</div></td><td>"+row[5]+"</td>\
                            <td>"+row[6]+"</td><td>"+row[7]+"</td>\
                            <td><div class='spot-text-wrapper' data-toggle='tooltip'>"+row[8]+"</div></td>\
                            <td><div class='spot-text-wrapper' data-toggle='tooltip'>"+row[9]+"</div></td>\
                            <td><div class='spot-text-wrapper' data-toggle='tooltip'>"+row[10]+"</div></td>\
                            <td>"+row[11]+"</td><td>"+row[12]+"</td><td>"+row[13]+"</td><td>"+row[14]+"</td></tr>"
                    else:
                        #Destination URI's refered by the threat
                        refered[row[4]]+=1
                    height += 20
                    i+=1
                table += "</table>"                
                result_html_title.value='<h4>Displaying top {0} search results</h4>'.format(top_results)
            except:
                table = "<table></table>"
                result_html_title.value='<h4>No results were found.</h4>'
                
            result_html.value=table
            result_html_box.children = [result_html]
 
            display_threat_box(anchor)
            resultTableBox.children = [result_html_title, result_html_box]
            display(bottomBox)
    search_btn.on_click(search_ip)

        
def display_threat_box(ip):    
    result_title.value="<h4 class='spot-text-wrapper spot-text-xlg' data-toggle='tooltip'>Threat summary for " + anchor +"</h4>"
    tc_txt_title = widgets.Text(value='', placeholder='Threat Title', width='100%')
    tc_txa_summary = widgets.Textarea(value='', height=100, width='95%')
    tc_btn_save = widgets.Button(description='Save', width='65px', layout='width:100%')
    tc_btn_save.button_style = 'primary'
    
    tc_txt_title._css = (
        (None, 'width', '95%'),
    )
    
    result_summary_container.children = [tc_txt_title, tc_txa_summary]
    result_button_container.children=[tc_btn_save]
    result_summary_box.children = [result_summary_container, result_button_container]
    resultSummaryBox.children = [result_title,result_summary_box]
    
    def save_threat_summary(b):
        global anchor 
        global anchor_hash 
        if anchor != '':      
            global threat_f
            if not os.path.exists(threat_f):  
                with open(threat_f, 'w') as comment:
                    comment.write('hash|title|summary\n')
            
            with open(threat_f, 'a') as comment:
                comment.write(anchor_hash + '|' + tc_txt_title.value + '|' +
                                  tc_txa_summary.value.replace('\n', '\\n') + '\n') 
            
            display(Javascript("$(\"option[data-value='" + anchor +"']\").remove();"))   
            display(Javascript("$('.widget-area > .widget-subarea > .widget-box:gt(0)').remove();"))
            
            response = "Summary successfully saved"
            incident_progression(anchor, anchor_hash)
            timeline(anchor, anchor_hash)
        else:
            response = "No data selected" 
       
        susp_select.selected_label = susp_select.options[0]
        display(widgets.Box((widgets.HTML(value=response, width='100%'),)))
    
    tc_btn_save.on_click(save_threat_summary)
    
    
def incident_progression(anchor, anchor_hash):
    file_name = dpath + 'incident-progression-'+anchor_hash+'.json'
    jsonstring = json.dumps({'fulluri':anchor, 'requests':requests,'referer_for':referers.keys()})
    if not os.path.exists(file_name):
         with open(file_name, 'w') as f:
            f.write(jsonstring)        
    response = "Incident progression successfuly created"
    display(widgets.Box((widgets.HTML(value=response, width='100%'),)))
    
    
def timeline(anchor, anchor_hash): 
    response = ""
    susp_ips = []
    if clientips:
        srtlist = sorted(list(clientips.items()), key=lambda x: x[1], reverse=True)
        for val in srtlist[:top_results]:
            susp_ips.append(val[0])  
            
    if anchor != "":
        sbdet_f = dpath + "timeline-"+anchor_hash+".tsv"
        if not os.path.isfile(sbdet_f) or (os.path.isfile(sbdet_f) and file_is_empty(sbdet_f)):  
            imp_query = "\"SELECT concat(cast(p_date as string), ' ', cast(MIN(p_time) as string)) AS tstart,\
            concat(cast(p_date as string), ' ', cast(MAX(p_time) as string)) AS tend, SUM(duration) AS duration,\
            clientip, respcode from {0}.proxy WHERE fulluri='{1}' AND clientip IN ({5}) \
            AND y='{2}' AND m='{3}' AND d='{4}' GROUP BY clientip, p_time, respcode, p_date ORDER BY clientip\
            LIMIT {6}\""

            imp_query=imp_query.format(DBNAME,anchor,yy,mm,dd,("'" + "','".join(susp_ips) + "'"), details_limit)  
            !impala-shell -i $IMPALA_DEM --quiet -q "INVALIDATE METADATA"
            !impala-shell -i $IMPALA_DEM --quiet --print_header -B --output_delimiter='\t' -q $imp_query -o $sbdet_f
            clear_output()
            
            response = "Timeline successfully saved"
    else:
        response = "Timeline couldn't be created"
    
    display(widgets.Box((widgets.HTML(value=response, width='100%'),)))
    data_loader()
    
def file_is_empty(path):
    return os.stat(path).st_size==0

def removeWidget(index):
    js_command = "$('.widget-area > .widget-subarea > .widget-box:eq({0})').remove();".format(index)    
    display(Javascript(js_command))

In [ ]:
start_investigation()