Save for Storyboard


In [1]:
import struct, socket
import numpy as np 
import csv, json 
import os 
import urllib2 
import datetime
import operator
import itertools

try:
    import ipywidgets as widgets # For jupyter/ipython >= 1.4
except ImportError:
    from IPython.html import widgets
from IPython.display import display, HTML, clear_output, Javascript 

with open('/etc/duxbay.conf') as conf:
    for line in conf.readlines():
        if "DBNAME=" in line: DBNAME = line.split("=")[1].strip('\n').replace("'","");      
        elif "IMPALA_DEM=" in line: IMPALA_DEM = line.split("=")[1].strip('\n').replace("'",""); 

path = os.getcwd().split("/") 
t_date = path[len(path)-1]   
dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'
t_date = path[len(path)-1] 
sconnect = dpath + 'dns_scores.csv' 
threat_f = dpath + "threats.csv"
anchor = ''
anchor_type = ''
top_results = 20


:0: FutureWarning: IPython widgets are experimental and may change in the future.
---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-1-dade28cb694b> in <module>()
     14 from IPython.display import display, HTML, clear_output, Javascript
     15 
---> 16 with open('/etc/duxbay.conf') as conf:
     17     for line in conf.readlines():
     18         if "DBNAME=" in line: DBNAME = line.split("=")[1].strip('\n').replace("'","");

IOError: [Errno 2] No such file or directory: '/etc/duxbay.conf'

Interface


In [ ]:
##Expanded search
susp_box   =  widgets.HBox(width=500, height=150)    
susp_h_box =  widgets.Box(width=500, height=200)    
susp_title =  widgets.HTML(value='<h4>Expanded search</h4>')
susp_select = widgets.Select(width=300)

##Search Results
result_title = widgets.HTML()     
result_html = widgets.HTML(width=500)
result_box  = widgets.Box(width=500)
result_box.children = [result_title, result_html]               

#Threat Summary
tc_box_main = widgets.Box(width=500, height=200)    
threat_container = widgets.HBox(width=500, height=150)
tc_box_separator = widgets.Box(width=500, height=15)     

yy = t_date[0:4]
mm = t_date[4:6] 
dd = t_date[6:8]

ip_sev={}
dns_sev={}

def start_investigation():
    ips_query = {}  
    c_ips=[]
    c_dns=[]

    display(Javascript("$('.widget-area > .widget-subarea > *').remove();"))    
    clear_output() 
    
    if os.path.isfile(threat_f) and not file_is_empty(threat_f):
        with open(threat_f, 'r') as th:
            t_read = csv.reader(th, delimiter='|')
            t_read.next()
            for row in t_read: 
                if row[0] != '' : c_ips.append(row[0])
                if row[1] != '' : c_dns.append(row[1])
            
    with open(sconnect, 'r') as f:
        reader = csv.reader(f, delimiter=',')
        reader.next()
        for row in reader:
            # frame_time, frame_len, ip_dst, dns_qry_name, dns_qry_class,  dns_qry_type, dns_qry_rcode, domain, subdomain, 
            #      0    ,      1   ,    2  ,       3     ,          4   ,         5    ,        6     ,   7   ,    8     ,
            #subdomain_length, num_periods, subdomain_entropy, top_domain, word, score, query_rep,  hh, 
            #      9          ,      10    ,     11          ,    12     ,  13 ,  14  ,   15     ,  16, 
            #ip_sev, dns_sev, dns_qry_class_name, dns_qry_type_name, dns_qry_rcode_name, network_context, unix_tstamp
            # 17   ,   18   ,           19      ,      20          ,         21        ,      22        ,     23
            if row[2] not in ips_query and row[2] not in c_ips and row[17] == '1': 
                    ips_query[row[2]]='i'
            if row[3] not in ips_query and row[3] not in c_dns and row[18] == '1':
                    ips_query[row[3]]='q' 
            
            if row[2] not in ip_sev: 
                ip_sev[row[2]] = row[14]
            if row[3] not in dns_sev: 
                dns_sev[row[3]] =row[14]
                    
    if len(ips_query) == 0:
        result_html = widgets.HTML(value="There are not high risk results.", width=500)
        result_box  = widgets.Box(width=500, height=150)
        result_box.children = [result_html]        
        display(result_box)              
    else:  
        sorted_dict = sorted(ips_query.items(), key=operator.itemgetter(0))      
        display_controls(sorted_dict)   

        
def display_controls(ip_list): 
    
    susp_title =  widgets.HTML(value='<h4>Expanded search</h4>')
    search_btn = widgets.Button(description='Search')
    susp_box.children = [susp_select,search_btn]
    susp_h_box.children = [susp_title,susp_box] 

    susp_select.options = ip_list
    susp_select.height=150
    susp_select.selected_label = ip_list[0][0]
    
    display(susp_h_box)
  
    def search_ip(b):  
        global anchor 
        global anchor_type
        anchor = ''
        anchor_type = ''
        anchor = susp_select.selected_label  
        anchor_type = susp_select.value 
        removeWidget(2)
        removeWidget(1) 
        clear_output()
        global ir_f
        ir_f = dpath + 'threat-dendro-' + anchor + ".csv" 
            
        table = "<table border=1><th>IP</th><th>QUERY</th><th>TOTAL</th>"

        if not os.path.isfile(ir_f) or (os.path.isfile(ir_f) and file_is_empty(ir_f)):
            if anchor_type == 'i':
                imp_query = ("\" SELECT COUNT(dns_qry_name) as total, dns_qry_name, ip_dst, 0 as sev FROM "+DBNAME+".dns " +
                       " WHERE y="+ yy +" AND m="+ mm +" AND d="+ dd +" AND ip_dst='"+ anchor +"' GROUP BY dns_qry_name, ip_dst" +
                       " ORDER BY total DESC LIMIT 10000\" ") 
            elif anchor_type == 'q':
                imp_query = ("\" SELECT COUNT(ip_dst) as total, dns_qry_name, ip_dst, 0 as sev FROM "+DBNAME+".dns " + 
                       " WHERE y="+ yy +" AND m="+ mm +" AND d="+ dd +" AND dns_qry_name='"+ anchor +"'" +
                       " GROUP BY ip_dst, dns_qry_name ORDER BY total DESC LIMIT 10000\"")   
        
            !impala-shell -i $IMPALA_DEM --print_header -B --output_delimiter=',' -q $imp_query -o $ir_f

        height=80
        clear_output() 
#       total, dns_qry_name, ip_dst, sev
        with open(ir_f, 'r') as f:
            reader = itertools.islice(csv.reader(f, delimiter=','), top_results) 
            if reader!= '':
                reader.next()
                for row in reader:  
                    table += "<tr><td>" +  row[2] + "</td><td>" + row[1] + "</td><td align='center'>" + str(row[0]) + "</td></tr>"  
                    height += 20
                    
            table += "</table>"            
                    
        result_html.value=table
        result_title.value='<h4>Displaying top {0} search results</h4>'.format(top_results)
        result_box.height=max(200, height)
        display_threat_box(anchor)
        display(result_box)
    
    search_btn.on_click(search_ip)

        
def display_threat_box(ip):    
    tc_div_label = widgets.HTML(value='<h4>Threat summary for ' + anchor +'</h4>')
    
    tc_txt_title = widgets.Text(value='',width=300, placeholder='Threat Title')
    tc_txa_summary = widgets.Textarea(value='', width=300, height=100)
    tc_btn_save = widgets.Button(description='Save')
    threat_container.children = [tc_txa_summary,tc_btn_save]
    tc_box_main.children = [tc_div_label, tc_txt_title, tc_box_separator, threat_container]  

    display(tc_box_main)     
    
    def save_threat_summary(b):
        global anchor
        anchor_ip =''
        anchor_dns ='' 
        if anchor != '':             
            if anchor_type == 'i':
                anchor_ip = anchor
            elif anchor_type == 'q':
                anchor_dns = anchor
                
            global threat_f
            if not os.path.exists(threat_f):  
                with open(threat_f, 'w') as comment:
                    comment.write('ip_dst|dns_qry_name|title|summary\n')
            
            with open(threat_f, 'a') as comment:
                comment.write(anchor_ip + '|' + anchor_dns + '|' + tc_txt_title.value + '|' +
                                  tc_txa_summary.value.replace('\n', '\\n') + '\n') 
            
            removeWidget(2)
            removeWidget(1)   
            display(Javascript("$(\"option[data-value='" + anchor  +"']\").remove();"))               
            response = "Successfully saved"
        else:
            response = "No data selected"
            
        save_html = widgets.HTML(value=response, width=500)
        save_box  = widgets.Box(width=500, height=150)
        save_box.children = [save_html]  
        susp_select.selected_label = susp_select.options[0][0]
        display(save_box)      
        
        
    tc_btn_save.on_click(save_threat_summary)
    
     
def file_is_empty(path):
    return os.stat(path).st_size==0

def removeWidget(index):
    js_command = "$('.widget-area > .widget-subarea > .widget-box:eq({0})').remove();".format(index)    
    display(Javascript(js_command))

In [ ]:
start_investigation()