In [5]:
import struct, socket
import numpy as np
import linecache, bisect
import csv
import operator
import json
import os
try:
    import ipywidgets as widgets # For jupyter/ipython >= 1.4
except ImportError:
    from IPython.html import widgets
from IPython.display import display, Javascript, clear_output

with open('/etc/duxbay.conf') as conf:
    for line in conf.readlines():            
        if "DBNAME=" in line: DBNAME = line.split("=")[1].strip('\n').replace("'","");      
        elif "IMPALA_DEM=" in line: IMPALA_DEM = line.split("=")[1].strip('\n').replace("'",""); 

spath = os.getcwd()
path = spath.split("/") 
date = path[len(path)-1]   
dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'
cpath = '/'.join(['context' if var == 'ipynb' else var for var in path][:len(path)-2]) + '/'

sconnect = dpath + 'flow_scores.csv'
threats_file = dpath + 'threats.csv'
iploc = cpath + 'iploc.csv'
nwloc = cpath + 'networkcontext_1.csv'
anchor_ip = ''
ir_f = ''
threat_name = ''
iplist = ''
top_inbound_b=''
top_results = 20
if os.path.isfile(iploc):
    iplist = np.loadtxt(iploc,dtype=np.uint32,delimiter=',',usecols={0}, converters={0: lambda s: np.uint32(s.replace('"',''))})
else:
    print "No iploc.csv file was found, Map View map won't be created"


:0: FutureWarning: IPython widgets are experimental and may change in the future.

Functions Definition


In [6]:
def display_controls(ip_list):
    container = widgets.HBox(width=550, height=150)    
    h_container = widgets.Box(width=550, height=200)    
    label = widgets.HTML(value='<h2>Expanded Search</h2>')
    ip_select = widgets.Select(options=ip_list,height=min(len(ip_list)*18+18,150),width=200)
    search_button = widgets.Button(description='Search')
    container.children = [ip_select,search_button]
    h_container.children = [label,container] 
    display(h_container)
    
    def search_ip(b):
        global anchor_ip 
        global top_inbound_b
        anchor_ip = ip_select.value   
        if anchor_ip != "":
            clear_output()        
            removeWidget(1)
            print "Searching for ip: " + anchor_ip
            global ir_f 
            ir_f = dpath + "ir-" + anchor_ip + ".tsv"

            if not os.path.isfile(ir_f) or (os.path.isfile(ir_f) and file_is_empty(ir_f)):  
                imp_query = (" \"SELECT min(treceived) as firstSeen, max(treceived) as lastSeen, sip as srcIP, dip as dstIP, " + 
                "sport as SPort, dport AS Dport, count(sip) as conns, max(ipkt) as maxPkts, avg(ipkt) " + 
                "as avgPkts, max(ibyt) as maxBytes, avg(ibyt) as avgBytes FROM "+DBNAME+".flow WHERE " + 
                "y="+ date[0:4] +" AND m="+ date[4:6] +" AND d="+ date[6:] +" " + 
                " AND (sip =\'" + anchor_ip + "\'  OR dip=\'" + anchor_ip + "\') GROUP BY sip, dip,sport,dport\" ")
                !impala-shell -i $IMPALA_DEM --print_header -B --output_delimiter='\t' -q $imp_query -o $ir_f
            clear_output()
            print "\n Looking for additional details..."
            get_in_out_and_twoway_conns()
            add_geospatial_info()
            add_network_context() 
            print anchor_ip + ": connected to " + str(len(inbound.keys())) + " IPs"
            top_inbound_b = get_top_bytes(inbound,top_results)
            top_inbound_conns = get_top_conns(inbound,top_results)
            top_inbound_b.update(top_inbound_conns) # merge the two dictionaries
            display_threat_box(anchor_ip)
            
    search_button.on_click(search_ip)


def display_threat_box(ip):
    container_summary = widgets.HBox(width=550, height=150)
    separator = widgets.Box(width=550, height=15) 
    main_container = widgets.Box(width=550, height=200)     
    
    threat_header =  widgets.HTML(value='<h4>Threat summary for ' + anchor_ip +'</h4>')  
    threat_title_box = widgets.Text(value='',width=200, placeholder='Threat Title')
    threat_summary_box = widgets.Textarea(value='',width=300, height=100)
    save_button = widgets.Button(description='Save')
    container_summary.children = [threat_summary_box,save_button]
    main_container.children = [threat_header, threat_title_box, separator, container_summary] 
    display(main_container)
    
    def save_threat_summary(b):
        clear_output()        
        removeWidget(1)
        global threat_name
        global top_inbound_b
        
        print "Creating Story Board elements ..."
        generate_attack_map_file(anchor_ip, top_inbound_b, outbound, twoway)
        generate_stats(anchor_ip, top_inbound_b, outbound, twoway, threat_name)
        generate_dendro(anchor_ip, top_inbound_b, outbound, twoway, date)
        details_inbound(anchor_ip,top_inbound_b)
        add_threat(anchor_ip, threat_title_box.value, threat_summary_box.value.replace('\n', '\\n'))
        print "Story board successfully created for {0}".format(anchor_ip)
        
    save_button.on_click(save_threat_summary)
    
        
def details_inbound(ip, inbound):
    if ip != "" and len(inbound) > 0:
        if os.path.isfile(ir_f):
            sbdet_f = dpath + "sbdet-" + ip + ".tsv"
            if not os.path.isfile(sbdet_f) or (os.path.isfile(sbdet_f) and file_is_empty(sbdet_f)):  
                imp_query = "SELECT min(treceived) as tstart, max(treceived) as tend, sip as srcIP, "
                + "dip as dstIP, proto as Proto, sport as SPort, dport AS Dport,ipkt as "
                + "Pkts, ibyt as Bytes FROM "+DBNAME+".flow WHERE "
                + "y="+ date[0:4] +" AND m="+ date[4:6] +" AND d="+ date[6:]
                + " AND ((dip IN({0}) "
                + "AND sip ='{1}') OR "
                + "(sip IN({0}) "
                + "AND dip ='{1}')) GROUP BY sip, dip, proto, sport, dport, ipkt, ibyt SORT BY tstart"

                ips = "'" + "','".join(inbound.keys()) + "'"
                imp_query = imp_query.format(ips,ip)
                !impala-shell -i $IMPALA_DEM --print_header -B --output_delimiter='\t' -q $imp_query -o $sbdet_f

            print "Timeline successfully created"
    else:
          print "Timeline couldn't be created"  


        
def generate_dendro(ip, inbound, outbound, twoway, date):  
    dendro_fpath = dpath + 'threat-dendro-' + anchor_ip + ".json"
    
    obj = {
        'name':ip,
        'children': [],
        'time': date        
    }
    
    #----- Add Inbound Connections-------#
    obj["children"].append({'name': 'Inbound Only', 'children': [], 'impact': 0})    
    in_ctxs = {}
    for ip in inbound:
        if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:
            ctx = inbound[ip]['nwloc'][2] # get the machine type Only for vast Data
            if ctx not in in_ctxs:
                in_ctxs[ctx] = 1
            else:
                in_ctxs[ctx] += 1
    for ctx in in_ctxs:
        obj["children"][0]['children'].append({
                'name': ctx,
                'impact': in_ctxs[ctx]
            })         
    
    #------ Add Outbound ----------------#
    obj["children"].append({'name': 'Outbound Only', 'children': [], 'impact': 0})
    out_ctxs = {}
    for ip in outbound:       
        if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:
            ctx = outbound[ip]['nwloc'][2] # get the machine type Only for vast Data
            if ctx not in out_ctxs:
                out_ctxs[ctx] = 1
            else:
                out_ctxs[ctx] += 1
    for ctx in out_ctxs:
        obj["children"][1]['children'].append({
                'name': ctx,
                'impact': out_ctxs[ctx]
            }) 
    
    #------ Add Outbound ----------------#
    obj["children"].append({'name': 'two way', 'children': [], 'impact': 0})
    tw_ctxs = {}
    for ip in twoway:
        if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:
            ctx = twoway[ip]['nwloc'][2] # get the machine type Only for vast Data
            if ctx not in tw_ctxs:
                tw_ctxs[ctx] = 1
            else:
                tw_ctxs[ctx] += 1
    
    for ctx in tw_ctxs:
        obj["children"][2]['children'].append({
                'name': ctx,
                'impact': tw_ctxs[ctx]
            })
    
    with open(dendro_fpath, 'w') as dendro_f:
        dendro_f.write(json.dumps(obj))
    print 'Incident progression successfully created'

    
def generate_stats(ip, inbound, outbound, twoway, threat_name):
    stats_fpath = dpath + 'stats-' + anchor_ip + ".json"
    
    obj = {
        'name':threat_name,
        'children': [],
        'size': len(inbound) + len(outbound) + len(twoway)
    }
    
    #----- Add Inbound Connections-------#
    obj["children"].append({'name': 'Inbound Only', 'children': [], 'size': len(inbound)})    
    in_ctxs = {}
    for ip in inbound:
        full_ctx = ''
        if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:
            full_ctx = inbound[ip]['nwloc'][2].split('.')[0]
        ctx = get_ctx_name(full_ctx) # get the machine type Only for vast Data
        if ctx not in in_ctxs:
            in_ctxs[ctx] = 1
        else:
            in_ctxs[ctx] += 1
    for ctx in in_ctxs:
        obj["children"][0]['children'].append({
                'name': ctx,
                'size': in_ctxs[ctx]
            })        
        
    
    #------ Add Outbound ----------------#
    obj["children"].append({'name': 'Outbound Only', 'children': [], 'size': len(outbound)})
    out_ctxs = {}
    for ip in outbound:
        full_ctx = ''
        if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:
            full_ctx = outbound[ip]['nwloc'][2].split('.')[0]
        ctx = get_ctx_name(full_ctx) # get the machine type Only for vast Data
        if ctx not in out_ctxs:
            out_ctxs[ctx] = 1
        else:
            out_ctxs[ctx] += 1
    for ctx in out_ctxs:
        obj["children"][1]['children'].append({
                'name': ctx,
                'size': out_ctxs[ctx]
            }) 
    
    #------ Add Twoway ----------------#
    obj["children"].append({'name': 'two way', 'children': [], 'size': len(twoway)})
    tw_ctxs = {}
    for ip in twoway:
        full_ctx = ''
        if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:
            full_ctx = twoway[ip]['nwloc'][2].split('.')[0]
        ctx = get_ctx_name(full_ctx) # get the machine type Only for vast Data
        if ctx not in tw_ctxs:
            tw_ctxs[ctx] = 1
        else:
            tw_ctxs[ctx] += 1
    
    for ctx in tw_ctxs:
        obj["children"][2]['children'].append({
                'name': ctx,
                'size': tw_ctxs[ctx]
            })
    
    json_str = json.dumps(obj)
    with open(stats_fpath, 'w') as stats_f:
        stats_f.write(json_str)
    print 'Stats file successfully created'
    
def get_ctx_name(full_context):    
    ctx= 'DMZ'
    if "VPN" in full_context:
        ctx = "VPN" 
    elif "DMZ" in full_context:
        ctx = "DMZ"
    elif "Proxy" in full_context:
        ctx = "Proxy" 
    elif "FW" in full_context:
        ctx = "FW"     
    return ctx


def display_expanded_search():
    external_ips = []
    c_ips=[]
    clear_output() 
    
    if os.path.isfile(threats_file) and (os.path.isfile(threats_file) and not file_is_empty(threats_file)):
        with open(threats_file, 'r') as th:
            t_read = csv.reader(th, delimiter=',')
            t_read.next()
            for row in t_read: 
                if row[0] != '' : c_ips.append(row[0]) 

    with open(sconnect, 'r') as f:
        reader = csv.reader(f, delimiter=',')
        reader.next()
        #Internal Netflows use case:
        for row in reader:
            #sev,tstart,srcIP,dstIP,sport,dport,proto,flag,ipkt,ibyt,lda_score,rank,srcIpInternal,destIpInternal,srcGeo,dstGeo,
            # 0 ,  1   ,  2  ,  3  ,  4  ,  5  ,  6  ,  7 ,  8 , 9  ,     10  , 11 ,      12     ,       13     ,  14  ,  15  ,
            #srcDomain,dstDomain,srcIP_rep,dstIP_rep
            #    16   ,   17    ,   18    ,   19     
            if row[0] == '1':
                srcIP = ''
                dstIP = '' 
                if row[2] not in external_ips and row[2] not in c_ips: 
                    external_ips.append(row[2])
                if row[3] not in external_ips and row[3] not in c_ips: 
                    external_ips.append(row[3])
    if len(external_ips) > 0:
        display_controls(external_ips)
    else:
        print "There are not high risk connections."

        
# calculate number of inbound only, two-way, and outbound only
# build dict of IP addresses
# firstSeen,lastSeen,srcIP, dstIP, sport,dport,conns, maxPkts, avgPkts,maxBytes, avgBytes
def get_in_out_and_twoway_conns():
    global inbound
    inbound = {}
    global outbound
    outbound = {}
    global twoway
    twoway = {}
    srcdict = {}
    dstdict = {}
    conns_dict= {} 
    if os.path.isfile(ir_f): 
        with open(ir_f, 'r') as f:
            reader = csv.reader(f,delimiter='\t')
            reader.next() #skip headers 
            rowct = 0
            for row in reader:  
                if row != []:
                    srcdict[row[2]] = {
                        'ip_int': struct.unpack("!L", socket.inet_aton(row[2]))[0],
                        'dst_ip': row[3],
                        'dst_ip_int': struct.unpack("!L", socket.inet_aton(row[3]))[0],
                        'conns': int(row[6]),
                        'maxbytes': int(row[9])
                    }
                    dstdict[row[3]] = {
                         'ip_int': struct.unpack("!L", socket.inet_aton(row[3]))[0],
                         'src_ip': row[2],
                         'src_ip_int': struct.unpack("!L", socket.inet_aton(row[2]))[0],
                         'conns': int(row[6]),
                         'maxbytes': int(row[9])
                        }       
                    rowct +=1  
    if rowct > 0:
        for result in srcdict:
            if result in dstdict:
                twoway[result] = srcdict[result]
            else:
                outbound[result] = srcdict[result]

        for result in dstdict:
            if result not in srcdict:
                inbound[result] = dstdict[result]  
        print "Input, Output & Two way connections file detected."
    else:
        print "Couldn't find any matching connections."
          

              
            
#=========== Adds GEO IP information to the outbound, inbound and twoway connections==============================# 
def add_geospatial_info():
    # get geospatial info, only when iplocation file is available
    if iplist != '':
        for srcip in outbound:
            reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,outbound[srcip]['ip_int'])).replace('\n','')])
            outbound[srcip]['geo'] = reader.next()
            reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,outbound[srcip]['dst_ip_int'])).replace('\n','')])
            outbound[srcip]['geo_dst'] = reader.next()

        for dstip in twoway:
            reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,twoway[dstip]['ip_int'])).replace('\n','')])
            twoway[dstip]['geo'] = reader.next()

        for srcip in inbound:
            reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,inbound[srcip]['ip_int'])).replace('\n','')])
            inbound[srcip]['geo'] = reader.next()
            reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,inbound[srcip]['src_ip_int'])).replace('\n','')])
            inbound[srcip]['geo_src'] = reader.next()
     
             
              
# need some way to combine timelines of outbound and two-way with big picture inbound only
# get network context - get start and end ranges
# need some way to combine timelines of outbound and two-way with big picture inbound only
# get network context - get start and end ranges
def add_network_context():
    nwdict = {}
    if os.path.isfile(nwloc) : 
        with open(nwloc, 'r') as f:
            reader = csv.reader(f,delimiter=',')
            reader.next()
            #address range, description
            for row in reader:

                if '/' in row[0]: 
                    #Range in subnet
                    iprange = row[0].split('/')
                    if len(iprange) < 2:
                        ipend = 0
                    else:
                        ipend = int(iprange[1])
                    nwdict[row[0]] = [struct.unpack("!L", socket.inet_aton(iprange[0]))[0],
                                      struct.unpack("!L", socket.inet_aton(iprange[0]))[0]+2**(32-ipend)-1, row[1]]  
                elif '-' in row[0]: 
                    #IP Range 
                    iprange = row[0].split('-')   
                    nwdict[row[0]] = [struct.unpack("!L", socket.inet_aton(iprange[0].replace(" ", "")))[0],
                                      struct.unpack("!L", socket.inet_aton(iprange[1].replace(" ", "")))[0], row[1]]
                else:
                    #Exact match  
                    nwdict[row[0]] = [struct.unpack("!L", socket.inet_aton(row[0]))[0],
                                      struct.unpack("!L", socket.inet_aton(row[0]))[0], row[1]] 

        for srcip in outbound:  
            temp_ip = struct.unpack("!L", socket.inet_aton(srcip))[0]
            if srcip in nwdict:
                inbound[srcip]['nwloc'] = nwdict[srcip]
            else:
                matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]
                outbound[srcip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''      

        for dstip in twoway:  
            temp_ip = struct.unpack("!L", socket.inet_aton(dstip))[0]
            if dstip in nwdict:
                twoway[dstip]['nwloc'] = nwdict[dstip]
            else:
                matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]
                twoway[dstip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''

        for srcip in inbound:
            temp_ip = struct.unpack("!L", socket.inet_aton(srcip))[0]
            if srcip in nwdict:
                inbound[srcip]['nwloc'] = nwdict[srcip]
            else:
                matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]
                inbound[srcip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''

   

def generate_attack_map_file(ip, inbound, outbound, twoway): 
    if iplist != '':
        globe_fpath = dpath + 'globe-' + anchor_ip + ".json"
        globe_json = {}
        globe_json['type'] = "FeatureCollection"
        globe_json['sourceips'] = []
        globe_json['destips'] = [] 
        for srcip in twoway:
            try:
                row =  twoway[srcip]['geo']                
                globe_json['destips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':srcip,
                            'type':1
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(row[7]), float(row[6])]
                        }
                    })
            except ValueError:
                pass


        for dstip in outbound:
            try:
                row =  outbound[dstip]['geo']
                dst_geo = outbound[dstip]['geo_dst']
                globe_json['sourceips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':dstip,
                            'type':3
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(row[7]), float(row[6])]
                        }
                    })
                globe_json['destips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':outbound[dstip]['dst_ip'],
                            'type':3
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(dst_geo[7]), float(dst_geo[6])]
                        }
                    }) 

            except ValueError:
                pass

        for dstip in inbound:
            try:
                row =  inbound[dstip]['geo']
                dst_geo = inbound[dstip]['geo_src']
                globe_json['sourceips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':dstip,
                            'type':2
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(row[7]), float(row[6])]
                        }
                    })
                globe_json['destips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':inbound[dstip]['src_ip'],
                            'type':2
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(dst_geo[7]), float(dst_geo[6])]
                        }
                    })
            except ValueError:
                pass

        json_str = json.dumps(globe_json)
        with open(globe_fpath, 'w') as globe_f:
            globe_f.write(json_str)
        print "Geolocation map successfully created"
    else:
        print "The map can't be created without an iploc file"
  
      
def add_threat(ip,threat_title, threat_comment):
    content = ''
    try:
        threat_f = open(threats_file, 'r')
        content = threat_f.read()
        if '{0}|{1}|{2}\n'.format(ip,threat_title,threat_comment) not in content:
            content += '{0}|{1}|{2}\n'.format(ip,threat_title,threat_comment)
        threat_f.close() 
    except:
        content = 'ip|title|summary\n'
        content += '{0}|{1}|{2}\n'.format(ip,threat_title,threat_comment)
    
    threat_fw = open(threats_file, 'w')
    threat_fw.write(content)
    threat_fw.close()
    
def get_top_bytes(conns_dict, top):
    print "Now looking at the top "+ str(top) +" connections per bytes:"
    topbytes = sorted(conns_dict.iteritems(), key=lambda (x,y): y['maxbytes'], reverse=True)
    topbytes = topbytes[0:top]
    for item in topbytes:
        print item[0], "|", item[1]["maxbytes"]
    return dict(topbytes)

def get_top_conns(conns_dict, top):
    print "Now looking at the top "+str(top)+" connections per number of connections:"
    topconns = sorted(conns_dict.iteritems(), key=lambda (x,y): y['conns'], reverse=True)
    topconns = topconns[0:top]
    for item in topconns:
        print item[0], "|", item[1]["conns"]
    return dict(topconns)

def file_is_empty(path):
    return os.stat(path).st_size==0

def removeWidget(index):
    js_command = "$('.widget-area > .widget-subarea > .widget-box:eq({0})').remove();".format(index)    
    display(Javascript(js_command))

In [7]:
display_expanded_search()


There are not high risk connections.