Triage Demonstration Using Elasticsearch / Kibana

Some helper functions below needed to insert office document prediction results into a local Elasticsearch instance. This was tested with ES / Kibana 5.1.2


In [8]:
import mmbot as mmb
from elasticsearch import Elasticsearch
import time
import requests
import json

def wipeESCreateMappings(myindex, doc_type):
    '''
    Helper function to destroy all data in myindex if it exists and recreate field mappings
    '''
    es = Elasticsearch([{'host':'localhost', 'port':9200}])
    try:
        es.indices.delete(myindex, ignore=400)
    except:
        print "Mapping does not exist"
    es.indices.get_settings()
    newmapping = {
        doc_type: {
            "properties" : {
                "prediction":  {"type":"string", "index":"not_analyzed"}, 
                "md5":  {"type":"string", "index":"not_analyzed"},
                "featureprint":  {"type":"string", "index":"not_analyzed"},
                "scan_date": {"type": "date", "format":"YYYY-MM-dd"},
                "extracted_vba": {"type":"string"},
                "function_names": {"type":"string"},
                "sample_tag": {"type":"string"},
                "vba_avg_param_per_func": {"type": "float"},
                "vba_cnt_comment_loc_ratio": {"type": "float"},
                "vba_cnt_comments": {"type": "long"},
                "vba_cnt_func_loc_ratio": {"type": "float"},
                "vba_cnt_functions": {"type": "long"},
                "vba_cnt_loc": {"type": "long"},
                "vba_entropy_chars": {"type": "float"},
                "vba_entropy_func_names": {"type": "float"},
                "vba_entropy_words": {"type": "float"},
                "vba_mean_loc_per_func": {"type": "float"}
            }
        }
    }

    es = Elasticsearch([{'host':'localhost', 'port':9200}])
    es.indices.create(index=myindex, ignore=400)
    put_mapping_result = es.indices.put_mapping(index=myindex, doc_type=doc_type, ignore=400, body=newmapping)
    print put_mapping_result 
    es.indices.get_settings()

def insertIntoES(doc, myindex, doc_type):
    '''
    Inserts the dictionary provided into the specified index and doc_type
    '''
    res = requests.get('http://localhost:9200/')
    es = Elasticsearch([{'host':'localhost', 'port':9200}])
    es.index(index=myindex, doc_type=doc_type, id=doc['md5'], body=json.dumps(doc))

def insertResultIntoES(prediction_result, sample_tag=None):
    '''
    Takes a prediction dataframe (from calling mm_predict) and parses out specific fields
    then inserts into ES for visualization
    '''
    for i in range(len(prediction_result)):
        vba_clean = prediction_result.iloc[i].extracted_vba[0:5000].encode("utf-8")
        scan_date = time.strftime('%Y-%m-%d')
        doc = {'md5':prediction_result.iloc[i].md5,
            'prediction':prediction_result.iloc[i].prediction,
            'featureprint':prediction_result.iloc[i].featureprint,
            'vba_cnt_comments':prediction_result.iloc[i].vba_cnt_comments,
            'vba_cnt_functions':prediction_result.iloc[i].vba_cnt_functions,
            'vba_cnt_loc':prediction_result.iloc[i].vba_cnt_loc,
            'vba_entropy_chars':prediction_result.iloc[i].vba_entropy_chars,
            'vba_entropy_func_names':prediction_result.iloc[i].vba_entropy_func_names,
            'vba_entropy_words':prediction_result.iloc[i].vba_entropy_words,
            'vba_mean_loc_per_func':prediction_result.iloc[i].vba_mean_loc_per_func,
            'extracted_vba':vba_clean,
            'scan_date':scan_date
        }        
        
        if sample_tag is not None:
            doc['sample_tag'] = sample_tag
        insertIntoES(doc, 'triage', 'triage_sample')
    print "Completed inserting {} documents".format(i)

(Re)Create field mapping in ES, make predictions, and insert into ES


In [12]:
%%time
wipeESCreateMappings('triage', 'triage_sample')

files_to_triage = '/home/test/vt_all/triage_demo/'
mymacrobot = mmb.MaliciousMacroBot()
mymacrobot.mmb_init_model(modelRebuild=False)
result = mymacrobot.mmb_predict(files_to_triage, datatype='filepath')
insertResultIntoES(result, sample_tag="triage")


{u'acknowledged': True}
Completed inserting 20 documents
CPU times: user 3.71 s, sys: 1.29 s, total: 5 s
Wall time: 5.2 s