Initialize workflow.


In [ ]:
import struct, socket
import shutil
import numpy as np
import pandas as pd
import linecache, bisect
import csv
import operator
import os, time, subprocess 
try:
    import ipywidgets as widgets # For jupyter/ipython >= 1.4
except ImportError:
    from IPython.html import widgets

from IPython.display import display, Javascript, clear_output

path = os.getcwd().split("/") 
date = path[len(path)-1]   
dsource = path[len(path)-2]  
dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'
cpath = '/'.join(['context' if var == 'ipynb' else var for var in path][:len(path)-2]) + '/'
sconnect = dpath + 'flow_scores.csv' 
sconnectbu = dpath + 'flow_scores_bu.csv'
score_fbk = dpath + 'flow_scores_fb.csv'
tmpconnect = sconnect +'.tmp'
stemp = sconnect + '.new'
coff = 250;
nwloc = cpath + 'networkcontext.csv' 
srcdict,srclist = {},[]
dstdict,dstlist = {},[]
sportdict,sportlist = {},[]
dportdict,dportlist = {},[]

In [ ]:
def apply_css_to_select(select):
    select._css = (
        (None, 'height', '90%'),
        (None, 'width', '90%'),
        ('select', 'overflow-x', 'auto'),
        ('select', 'width', '100%'),
        ('select', 'margin', 0)
    )

#load LDA scores#update lda doc with scores - either one edge,# or ALL that contain x IP and y port
def displaythis() :
    # build dict of IP addresses
    #sev,score, tstart,srcIP,dstIP,sport,dport,proto,ipkt,ibyt
    display(Javascript("$('.widget-area > .widget-subarea > *').remove();"))
    srcdict,srclist = {},[]
    dstdict,dstlist = {},[]
    sportdict,sportlist = {},[]
    dportdict,dportlist = {},[]
    srclist.append('- Select -')
    dstlist.append('- Select -')
    sportlist.append('- Select -')
    dportlist.append('- Select -')
    set_rules()
    with open(sconnect, 'r') as f:
        reader = csv.reader(f,delimiter=',') 
        reader.next();
        rowct = 1
        for row in reader:
            if row[2] not in srcdict and row[0] == '0':
                srclist.append(row[2])
                srcdict[row[2]] = struct.unpack("!L", socket.inet_aton(row[2]))[0]
            if row[3] not in dstdict and row[0] == '0':
                dstlist.append(row[3])
                dstdict[row[3]] = struct.unpack("!L", socket.inet_aton(row[3]))[0]
            if row[4] not in sportdict and row[0] == '0':
                sportlist.append(row[4])
                sportdict[row[4]] = row[4]
            if row[5] not in dportdict and row[0] == '0':
                dportlist.append(row[5])
                dportdict[row[5]] = row[5]
            if rowct == coff:
                break;
            rowct += 1
    
    # Source IP box
    scrIpLalbel = widgets.HTML(value="Source IP:", height='10%', width='100%')
    srcselect = widgets.Select(options=srclist, height='90%')
    apply_css_to_select(srcselect)
    srcIpBox = widgets.Box(width='25%', height='100%')
    srcIpBox.children = (scrIpLalbel, srcselect)
    
    # Destination IP box
    dstIpLalbel = widgets.HTML(value="Dest IP:", height='10%', width='100%')
    dstselect = widgets.Select(options=dstlist, height='90%')
    apply_css_to_select(dstselect)
    dstIpBox = widgets.Box(width='25%', height='100%')
    dstIpBox.children = (dstIpLalbel, dstselect)
    
    # Source Port box
    scrPortLalbel = widgets.HTML(value="Src Port:", height='10%', width='100%')
    sportselect = widgets.Select(options=sportlist, height='90%')
    apply_css_to_select(sportselect)
    srcPortBox = widgets.Box(width='20%', height='100%')
    srcPortBox.children = (scrPortLalbel, sportselect)
    
    # Destionation Port box
    dstPortLalbel = widgets.HTML(value="Dst Port:", height='10%', width='100%')
    dportselect = widgets.Select(options=dportlist,height='90%')
    apply_css_to_select(dportselect)
    dstPortBox = widgets.Box(width='20%', height='100%')
    dstPortBox.children = (dstPortLalbel, dportselect)
    
    # Quick Search and Actions Box
    emptyLalbel = widgets.HTML(value=" ")
    srctext = widgets.Text(value='', width='100%', placeholder='Quick IP scoring')
    srctext._css = (
        (None, 'width', '100%'),
    )
    ratingbut = widgets.RadioButtons(description='Rating:',options=['1', '2', '3'], width='100%')
    assignbut = widgets.Button(description='Score', width='45%')
    assignbut.button_style = 'primary'
    updatebut = widgets.Button(description='Save', width='45%')
    updatebut.button_style = 'primary'
    updatebut._css = (
        (None, 'margin-left', '10%'),
    )
    actionsBox = widgets.Box(width='20%', height='100%')
    actionsBox.children = (emptyLalbel, srctext,ratingbut,assignbut,updatebut)
    
    # Container Box
    bigBox = widgets.HBox(width='90%', height=250)
    bigBox.children = (srcIpBox, dstIpBox, srcPortBox, dstPortBox, actionsBox)
    
    display(bigBox)
    
    def update_sconnects(b):        
        clear_output()
        time.sleep(.25)
        dvals,svals = [], [] 
        scored_threats = []
        #define logic based on combo of input
        #Gets input values
        if srctext.value != '':
            svals = [srctext.value,dstselect.value,sportselect.value,dportselect.value]
            dvals = [srcselect.value,srctext.value,sportselect.value,dportselect.value] 
        else:
            svals = [srcselect.value,dstselect.value,sportselect.value,dportselect.value]
            dvals = [] 
        risk = ratingbut.value 
        shash, dhash = 0, 0
        fhash = [2,3,4,5] 
        
        for k in xrange(len(svals)):
            if svals[k] == '- Select -': svals[k] = ''
            if svals[k] != '': shash += 2**k    
            if len(dvals) > 0:
                if dvals[k] == '- Select -': dvals[k] = ''
                if dvals[k] != '': dhash += 2**k    
        
        rowct = 0
        if shash > 0 or dhash > 0:            
            with open(tmpconnect,'w') as g:
                riter = csv.writer(g,delimiter=',')
                riter.writerow(["sev"]+["tstart"]+["srcIP"]+["dstIP"]+["sport"]+["dport"]+["proto"]+["flag"]+["ipkt"]+["ibyt"]+
                               ["lda_score"]+["rank"]+["srcIpInternal"]+["destIpInternal"]+["srcGeo"]+["dstGeo"]+["srcDomain"]+
                               ["dstDomain"]+ ["srcIP_rep"]+["dstIP_rep"])
                with open(sconnect, 'r') as f:
                    reader = csv.reader(f,delimiter=',')
                    reader.next();
                    #sev,tstart,srcIP,dstIP,sport,dport,proto,flag,ipkt,ibyt,lda_score,rank,srcIpInternal,destIpInternal,srcGeo,
                    #dstGeo,srcDomain,dstDomain,gtiSrcRep,gtiDstRep,norseSrcRep,norseDstRep 
                    for row in reader: 
                        result, resultd = 0,0
                        for n in xrange(0,len(svals)):
                            if (2**n & shash) > 0:  
                                if row[fhash[n]] == svals[n]:
                                    result += 2**n 
                        if result == shash:
                            row[0] = risk
                            scored_threats.append(row)    
                            rowct += 1

                        if len(dvals) > 0:
                            for n in xrange(0,len(dvals)):
                                if (2**n & dhash) > 0:  
                                    if row[fhash[n]] == dvals[n]:
                                        resultd += 2**n 
                            if resultd == dhash:
                                row[0] = risk
                                scored_threats.append(row)    
                                rowct += 1

                        riter.writerow(row)  

            #works on the feedback tab-separated file
            if not os.path.exists(score_fbk):  
                with open(score_fbk, 'w') as feedback:
                    wr = csv.writer(feedback, delimiter='\t', quoting=csv.QUOTE_NONE)   
                    wr.writerow(["sev"]+["tstart"]+["srcIP"]+["dstIP"]+["sport"]+["dport"]+["proto"]+["flag"]+["ipkt"]+["ibyt"]+
                               ["lda_score"]+["rank"]+["srcIpInternal"]+["destIpInternal"]+["srcGeo"]+["dstGeo"]+["srcDomain"]+
                               ["dstDomain"]+ ["srcIP_rep"]+["dstIP_rep"])         

            with open(score_fbk, 'a') as feedback:
                for row in scored_threats:
                    wr = csv.writer(feedback, delimiter='\t', quoting=csv.QUOTE_NONE)
                    wr.writerow(row)
                                        
            shutil.copyfile(tmpconnect,sconnect)
            
        print "{0} matching connections scored".format(rowct)
        
            
    def savesort(b):
        # Clear last output
        clear_output()
        with open(stemp,'w') as g:
            riter = csv.writer(g,delimiter=',')
            riter.writerow(["sev"]+["tstart"]+["srcIP"]+["dstIP"]+["sport"]+["dport"]+["proto"]+["flag"]+["ipkt"]+["ibyt"]+
                           ["lda_score"]+["rank"]+["srcIpInternal"]+["destIpInternal"]+["srcGeo"]+["dstGeo"]+["srcDomain"]+
                           ["dstDomain"]+ ["srcIP_rep"]+["dstIP_rep"])
            reader = csv.reader(open(sconnect), delimiter=",")
            reader.next()
            srtlist = sorted(reader, key=lambda x: (int(x[0]), float(x[11])))
            riter.writerows(srtlist)
                
        shutil.copyfile(stemp,sconnect)
        print "Suspicious connects successfully updated"        
        display(Javascript('reloadParentData();'))
        # Close widgets form
        bigBox.close()
        # Rebuild widgets form
        displaythis()
        ml_feedback()
    assignbut.on_click(update_sconnects)
    updatebut.on_click(savesort)

    
def set_rules():
    rops = ['leq','leq','leq','leq','leq','leq']
    rvals = ['','','',1024,'',54]
    risk = 2
    apply_rules(rops,rvals,risk)
    rops = ['leq','leq','leq','leq','eq','eq']
    rvals = ['','','',1024,3,152]
    risk = 2
    apply_rules(rops,rvals,risk)
    rops = ['leq','leq','leq','leq','eq','eq']
    rvals = ['','','',1024,2,104]
    risk = 2
    rops = ['leq','leq','eq','leq','leq','leq']
    rvals = ['','',0,1023,'','']
    risk = 2

    apply_rules(rops,rvals,risk)

    
def apply_rules(rops,rvals,risk):
    #define logic based on combo of input
    rhash = 0
    rfhash = [2,3,4,5,8,9]
    for k in xrange(len(rvals)):
        if rvals[k] != '':                
            rhash += 2**k
    with open(tmpconnect,'w') as g:
        riter = csv.writer(g,delimiter=',')
        riter.writerow(["sev"]+["tstart"]+["srcIP"]+["dstIP"]+["sport"]+["dport"]+["proto"]+["flag"]+["ipkt"]+["ibyt"]+
                       ["lda_score"]+["rank"]+["srcIpInternal"]+["destIpInternal"]+["srcGeo"]+["dstGeo"]+["srcDomain"]+
                       ["dstDomain"]+ ["srcIP_rep"]+["dstIP_rep"])
        
        with open(sconnect, 'r') as f:
            reader = csv.reader(f,delimiter=',')
            reader.next();
            rowct = 1
            for row in reader: 
                result = 0
                for n in xrange(0,6):
                    if (2**n & rhash) > 0:
                        if rops[n] == 'leq':
                            if int(row[rfhash[n]]) <= int(rvals[n]):
                                result += 2**n                           
                        if rops[n] == 'eq':
                            if int(row[rfhash[n]]) == int(rvals[n]):
                                result += 2**n                           
                if result == rhash:
                    row[0] = risk
                    rowct += 1
                riter.writerow(row) 
    shutil.copyfile(tmpconnect,sconnect)

    
def attack_heuristics():
    with open(sconnect, 'rb') as f:
        reader = csv.reader(f,delimiter=',') 
        reader.next();
        rowct = 1
        for row in reader:
            if row[2] not in srcdict:
                srcdict[row[2]] = row[2]
            if row[3] not in dstdict:
                 dstdict[row[3]] = row[3]
            if row[4] not in sportdict:
                sportdict[row[4]] = row[4]
            if row[5] not in dportdict:
                dportdict[row[5]] = row[5]

    df = pd.read_csv(sconnect)   
    gb = df.groupby([u'srcIP'])      
  
    for srcip in srcdict:
        try:
            if len(gb.get_group(srcip)) > 20:
                print srcip,'connects:',len(gb.get_group(srcip))
        except:
            print "Key Error for ip: " + srcip
               
            
def ml_feedback():
    dst_name = os.path.basename(sconnect)
    str_fb="DSOURCE={0} &&\
        FDATE={1} &&\
        source /etc/duxbay.conf &&\
        usr=$(echo $LUSER | cut -f3 -d'/') &&\
        mlnode=$MLNODE &&\
        lpath=$LPATH &&\
        scp {2} $usr@$mlnode:$lpath/{3}".format(dsource,date,score_fbk,dst_name)  
    
    subprocess.call(str_fb, shell=True)

Run attack heuristics.


In [ ]:
displaythis()

In [ ]:
# set_rules()

In [ ]:
# attack_heuristics()

In [ ]:
# !cp $sconnectbu $sconnect