Graphistry brings modern visual analytics to event data in Splunk. The full platform is intended for enterprise teams, while this tutorials shares visibility techniques for researchers and hunters.
To use:
Further reading:
In [0]:
#graphistry
GRAPHISTRY = {
# 'server': 'MY.graphistry.com',
# 'protocol': 'https',
# 'key': 'MY_GRAPHISTRY_KEY',
# 'api': 2
}
#splunk
SPLUNK = {
'host': 'MY.SPLUNK.com',
'scheme': 'https',
'port': 8089,
'username': 'MY_SPLUNK_USER',
'password': 'MY_SPLUNK_PWD'
}
In [0]:
import pandas as pd
In [0]:
!pip install graphistry
import graphistry
#graphistry.register(**GRAPHISTRY)
graphistry.__version__
Out[0]:
In [0]:
!pip install splunk-sdk
import splunklib
In [0]:
#Connect to Splunk. Replace settings with your own setup.
import splunklib.client as client
import splunklib.results as results
service = client.connect(**SPLUNK)
In [0]:
def extend(o, override):
for k in override.keys():
o[k] = override[k]
return o
STEP = 10000;
def splunkToPandas(qry, overrides={}):
kwargs_blockingsearch = extend({
"count": 0,
"earliest_time": "2010-01-24T07:20:38.000-05:00",
"latest_time": "now",
"search_mode": "normal",
"exec_mode": "blocking"
}, overrides)
job = service.jobs.create(qry, **kwargs_blockingsearch)
print "Search results:\n"
resultCount = job["resultCount"]
offset = 0;
print 'results', resultCount
out = None
while (offset < int(resultCount)):
print "fetching:", offset, '-', offset + STEP
kwargs_paginate = extend(kwargs_blockingsearch,
{"count": STEP,
"offset": offset})
# Get the search results and display them
blocksearch_results = job.results(**kwargs_paginate)
reader = results.ResultsReader(blocksearch_results)
lst = [x for x in reader]
df2 = pd.DataFrame(lst)
out = df2 if type(out) == type(None) else pd.concat([out, df2], ignore_index=True)
offset += STEP
return out
In [0]:
query = 'search index="vast" srcip=* destip=* | rename destip -> dest_ip, srcip -> src_ip | fields dest_ip _time src_ip protocol | eval time=_time | fields - _* '
%time df = splunkToPandas(query, {"sample_ratio": 1000})
#df = splunkToPandasAll('search index="vast" | head 10')
#df = pd.concat([ splunkToPandas('search index="vast" | head 10'), splunkToPandas('search index="vast" | head 10') ], ignore_index=True)
print 'results', len(df)
df.sample(5)
Out[0]:
In [0]:
graphistry.bind(source='src_ip', destination='dest_ip').edges(df).plot()
Out[0]:
In [0]:
def make_edges(df, src, dst):
out = df.copy()
out['src'] = df[src]
out['dst'] = df[dst]
return out
ip2ip = make_edges(df, 'src_ip', 'dest_ip')
srcip2protocol = make_edges(df, 'src_ip', 'protocol')
combined = pd.concat([ip2ip, srcip2protocol], ignore_index=True)
combined.sample(6)
Out[0]:
In [0]:
graphistry.bind(source='src', destination='dst').edges(combined).plot()
Out[0]:
In [0]:
hg = graphistry.hypergraph(df, entity_types=[ 'src_ip', 'dest_ip', 'protocol'] )
print hg.keys()
hg['graph'].plot()
Out[0]:
In [0]:
In [0]:
nodes = pd.concat([
df[['src_ip']].rename(columns={'src_ip': 'id'}).assign(orig_col='src_ip'),
df[['dest_ip']].rename(columns={'dest_ip': 'id'}).assign(orig_col='dest_ip') ],
ignore_index=True).drop_duplicates(['id'])
#see https://labs.graphistry.com/docs/docs/palette.html
col2color = {
"src_ip": 90005,
"dest_ip": 46005
}
nodes_with_color = nodes.assign(color=nodes.apply(lambda row: col2color[ row['orig_col'] ], axis=1))
nodes_with_color.sample(3)
Out[0]:
In [0]:
graphistry.bind(source='src_ip', destination='dest_ip').edges(df).nodes(nodes_with_color).bind(node='id', point_color='color').plot()
Out[0]:
In [0]: