In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import silk
import datetime
%matplotlib inline
pd.options.display.mpl_style='default'
pd.options.display.max_rows=100
pd.options.display.max_columns=30
pd.options.display.width=300
In [2]:
if (not silk.site.have_site_config()):
silk.site.init_site('samples/silk.conf')
print "Sensors: ", ', '.join(list(silk.site.sensors()))
print "Classes: ", ', '.join(list(silk.site.classes()))
for cls in silk.site.classes():
print "Types in class '" + cls + "': ", ', '.join(list(silk.site.types(cls)))
print "Timezone: " + silk.get_configuration("TIMEZONE_SUPPORT")
print "Data Root: " + silk.site.get_data_rootdir()
In [3]:
silkfiles=['samples/1mil.rwz']
print 'loaded {0} files'.format(len(silkfiles))
In [4]:
# convenience method to always return a SilkFile
def open_silk(silkfile):
if silkfile.__class__ == silk.SilkFile:
return silkfile
else:
return silk.silkfile_open(silkfile,silk.READ)
# simple projection of RWRec objects
def map_rwrec(rec):
return {
'stime': rec.stime,
'application': rec.application,
'protocol': rec.protocol,
'bytes': rec.bytes,
'packets': rec.packets
}
# lazy iterable projection
def iter_project(silkfile,func=map_rwrec):
sf=open_silk(silkfile)
for rec in sf:
yield func(rec)
# convenience method for creating dataframes from silkfiles
def create_df(silkfile, func=map_rwrec):
return pd.DataFrame.from_records(iter_project(silkfile),index='stime')
In [5]:
frame=create_df(silkfiles[0])
In [8]:
downsample_1hour=frame[['bytes','packets']].resample('1H',how='sum')
downsample_1hour.plot(secondary_y='packets',figsize=(8,8))
Out[8]:
In [9]:
downsample_1minute=frame[['bytes','packets']].resample('1T',how='sum')
downsample_1minute.plot(secondary_y='packets',figsize=(8,8))
Out[9]:
In [ ]: