In [42]:
import pandas     as pd
import matplotlib.pyplot as plt
import numpy      as np
import silk
import datetime

from struct import pack

%matplotlib inline
pd.options.display.mpl_style='default'
pd.options.display.max_rows=100
pd.options.display.max_columns=30
pd.options.display.width=300

In [43]:
if (not silk.site.have_site_config()):
    silk.site.init_site('samples/silk.conf')

print "Sensors: ", ', '.join(list(silk.site.sensors()))
print "Classes: ", ', '.join(list(silk.site.classes()))
for cls in silk.site.classes():
    print "Types in class '" + cls + "': ", ', '.join(list(silk.site.types(cls)))
print "Timezone: " + silk.get_configuration("TIMEZONE_SUPPORT")
print "Data Root: " + silk.site.get_data_rootdir()


Sensors:  S2, S1, S0
Classes:  all
Types in class 'all':  in, out, inweb, outweb, innull, outnull, int2int, ext2ext, inicmp, outicmp, other
Timezone: local
Data Root: /data

In [44]:
# WILL NOT RUN ON YOUR HOST
#silkfiles=list(
#    silk.site.repository_iter(start='2014/05/01:00', end='2014/05/01:03', classname='all', types=['in','inweb'] )
#)
#print 'loaded {0} files'.format(len(silkfiles))

In [45]:
# convenience method to always return a SilkFile
def open_silk(silkfile):
  if silkfile.__class__ == silk.SilkFile:
    return silkfile
  else:
    return silk.silkfile_open(silkfile,silk.READ)

# simple projection of RWRec objects
def map_rwrec(rec):
    return {
        'stime':       rec.stime,
        'application': rec.application,
        'protocol':    rec.protocol,
        'sip':         str(rec.sip),
        'sint':        int(rec.sip),
        'sport':       rec.sport,
        'dip':         str(rec.dip),
        'dint':        int(rec.dip),
        'dport':       rec.dport,
        'bytes':       rec.bytes,
        'packets':     rec.packets,
        'duration':    rec.duration_secs,
        'flags_init':  str(rec.initial_tcpflags),
        'flags_sess':  str(rec.session_tcpflags),
        'classname':   rec.classname,
        'typename':    rec.typename,
        'records':     1,
        'sensor':      rec.sensor_id,
    }

# lazily iterable projection
def iter_project(silkfile,func=map_rwrec):
  sf=open_silk(silkfile)
  for rec in sf:
    yield func(rec)

# convenience method for creating dataframes from silkfiles
def create_df(silkfile, func=map_rwrec):
    return pd.DataFrame.from_records(iter_project(silkfile),index='stime')

In [46]:
sample_file='samples/1mil.rwz'
print str(datetime.datetime.now()) + " " + sample_file
frame=create_df(sample_file)
print str(datetime.datetime.now())


2014-10-07 14:35:59.579164 samples/1mil.rwz
2014-10-07 14:36:25.101813

In [67]:
filtered=frame[frame['protocol']==6]

In [68]:
filtered.columns


Out[68]:
Index([u'application', u'bytes', u'classname', u'dint', u'dip', u'dport', u'duration', u'flags_init', u'flags_sess', u'packets', u'protocol', u'records', u'sensor', u'sint', u'sip', u'sport', u'typename'], dtype='object')

In [69]:
totals=filtered[['dport','records','bytes','packets']].groupby(['dport']).sum()

In [70]:
totals.plot(kind='bar',subplots=True,sharex=True,figsize=(8,12))


Out[70]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x10885c9d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x113dcb1d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x133a52050>], dtype=object)