In [42]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import silk
import datetime
from struct import pack
%matplotlib inline
pd.options.display.mpl_style='default'
pd.options.display.max_rows=100
pd.options.display.max_columns=30
pd.options.display.width=300
In [43]:
if (not silk.site.have_site_config()):
silk.site.init_site('samples/silk.conf')
print "Sensors: ", ', '.join(list(silk.site.sensors()))
print "Classes: ", ', '.join(list(silk.site.classes()))
for cls in silk.site.classes():
print "Types in class '" + cls + "': ", ', '.join(list(silk.site.types(cls)))
print "Timezone: " + silk.get_configuration("TIMEZONE_SUPPORT")
print "Data Root: " + silk.site.get_data_rootdir()
In [44]:
# WILL NOT RUN ON YOUR HOST
#silkfiles=list(
# silk.site.repository_iter(start='2014/05/01:00', end='2014/05/01:03', classname='all', types=['in','inweb'] )
#)
#print 'loaded {0} files'.format(len(silkfiles))
In [45]:
# convenience method to always return a SilkFile
def open_silk(silkfile):
if silkfile.__class__ == silk.SilkFile:
return silkfile
else:
return silk.silkfile_open(silkfile,silk.READ)
# simple projection of RWRec objects
def map_rwrec(rec):
return {
'stime': rec.stime,
'application': rec.application,
'protocol': rec.protocol,
'sip': str(rec.sip),
'sint': int(rec.sip),
'sport': rec.sport,
'dip': str(rec.dip),
'dint': int(rec.dip),
'dport': rec.dport,
'bytes': rec.bytes,
'packets': rec.packets,
'duration': rec.duration_secs,
'flags_init': str(rec.initial_tcpflags),
'flags_sess': str(rec.session_tcpflags),
'classname': rec.classname,
'typename': rec.typename,
'records': 1,
'sensor': rec.sensor_id,
}
# lazily iterable projection
def iter_project(silkfile,func=map_rwrec):
sf=open_silk(silkfile)
for rec in sf:
yield func(rec)
# convenience method for creating dataframes from silkfiles
def create_df(silkfile, func=map_rwrec):
return pd.DataFrame.from_records(iter_project(silkfile),index='stime')
In [46]:
sample_file='samples/1mil.rwz'
print str(datetime.datetime.now()) + " " + sample_file
frame=create_df(sample_file)
print str(datetime.datetime.now())
In [67]:
filtered=frame[frame['protocol']==6]
In [68]:
filtered.columns
Out[68]:
In [69]:
totals=filtered[['dport','records','bytes','packets']].groupby(['dport']).sum()
In [70]:
totals.plot(kind='bar',subplots=True,sharex=True,figsize=(8,12))
Out[70]: