Some helper functions below needed to insert office document prediction results into a local Elasticsearch instance. This was tested with ES / Kibana 5.1.2
In [8]:
import mmbot as mmb
from elasticsearch import Elasticsearch
import time
import requests
import json
def wipeESCreateMappings(myindex, doc_type):
'''
Helper function to destroy all data in myindex if it exists and recreate field mappings
'''
es = Elasticsearch([{'host':'localhost', 'port':9200}])
try:
es.indices.delete(myindex, ignore=400)
except:
print "Mapping does not exist"
es.indices.get_settings()
newmapping = {
doc_type: {
"properties" : {
"prediction": {"type":"string", "index":"not_analyzed"},
"md5": {"type":"string", "index":"not_analyzed"},
"featureprint": {"type":"string", "index":"not_analyzed"},
"scan_date": {"type": "date", "format":"YYYY-MM-dd"},
"extracted_vba": {"type":"string"},
"function_names": {"type":"string"},
"sample_tag": {"type":"string"},
"vba_avg_param_per_func": {"type": "float"},
"vba_cnt_comment_loc_ratio": {"type": "float"},
"vba_cnt_comments": {"type": "long"},
"vba_cnt_func_loc_ratio": {"type": "float"},
"vba_cnt_functions": {"type": "long"},
"vba_cnt_loc": {"type": "long"},
"vba_entropy_chars": {"type": "float"},
"vba_entropy_func_names": {"type": "float"},
"vba_entropy_words": {"type": "float"},
"vba_mean_loc_per_func": {"type": "float"}
}
}
}
es = Elasticsearch([{'host':'localhost', 'port':9200}])
es.indices.create(index=myindex, ignore=400)
put_mapping_result = es.indices.put_mapping(index=myindex, doc_type=doc_type, ignore=400, body=newmapping)
print put_mapping_result
es.indices.get_settings()
def insertIntoES(doc, myindex, doc_type):
'''
Inserts the dictionary provided into the specified index and doc_type
'''
res = requests.get('http://localhost:9200/')
es = Elasticsearch([{'host':'localhost', 'port':9200}])
es.index(index=myindex, doc_type=doc_type, id=doc['md5'], body=json.dumps(doc))
def insertResultIntoES(prediction_result, sample_tag=None):
'''
Takes a prediction dataframe (from calling mm_predict) and parses out specific fields
then inserts into ES for visualization
'''
for i in range(len(prediction_result)):
vba_clean = prediction_result.iloc[i].extracted_vba[0:5000].encode("utf-8")
scan_date = time.strftime('%Y-%m-%d')
doc = {'md5':prediction_result.iloc[i].md5,
'prediction':prediction_result.iloc[i].prediction,
'featureprint':prediction_result.iloc[i].featureprint,
'vba_cnt_comments':prediction_result.iloc[i].vba_cnt_comments,
'vba_cnt_functions':prediction_result.iloc[i].vba_cnt_functions,
'vba_cnt_loc':prediction_result.iloc[i].vba_cnt_loc,
'vba_entropy_chars':prediction_result.iloc[i].vba_entropy_chars,
'vba_entropy_func_names':prediction_result.iloc[i].vba_entropy_func_names,
'vba_entropy_words':prediction_result.iloc[i].vba_entropy_words,
'vba_mean_loc_per_func':prediction_result.iloc[i].vba_mean_loc_per_func,
'extracted_vba':vba_clean,
'scan_date':scan_date
}
if sample_tag is not None:
doc['sample_tag'] = sample_tag
insertIntoES(doc, 'triage', 'triage_sample')
print "Completed inserting {} documents".format(i)
In [12]:
%%time
wipeESCreateMappings('triage', 'triage_sample')
files_to_triage = '/home/test/vt_all/triage_demo/'
mymacrobot = mmb.MaliciousMacroBot()
mymacrobot.mmb_init_model(modelRebuild=False)
result = mymacrobot.mmb_predict(files_to_triage, datatype='filepath')
insertResultIntoES(result, sample_tag="triage")