In [424]:
import pandas as pd
import numpy as np
%matplotlib inline
df = pd.read_csv("testlog.csv", header=None, names = ('time','src','dst','sport','dport','byte'))
df.head()
Out[424]:
In [425]:
dfg = df.groupby('time')['src'].count()
In [426]:
dfg.head()
Out[426]:
In [427]:
dfg.plot()
Out[427]:
In [428]:
import numpy as np
from matplotlib import pylab as plt
from pykalman import KalmanFilter
x = -0.37727
observations = dfg.tolist()
kf = KalmanFilter(transition_matrices=np.array([[1, 1], [0, 1]]),
transition_covariance=0.01*np.eye(2))
smoothed_states_pred = kf.em(observations).smooth(observations)[0]
filtered_states_pred = kf.em(observations).filter(observations)[0]
plt.figure(figsize=(12, 9))
plt.plot(observations, "-xc", label="observations")
plt.plot(smoothed_states_pred[:, 0], "b", label="smoothed")
plt.plot(filtered_states_pred[:, 0], "r", label="filtered")
plt.axhline(x, color="k", label="truth value")
plt.legend()
plt.show()
In [429]:
dfg2 = df.groupby(['time','src'])['dst'].count()
In [430]:
dfg = pd.DataFrame({'time': dfg.index, 'total':dfg.values})
In [431]:
dfg2.head()
Out[431]:
In [432]:
a,b=zip(*dfg2.index.tolist())
In [433]:
dfg2 = pd.DataFrame({'time':a,'src':b,'src_cnt':dfg2.values})
In [434]:
df2 = pd.merge(dfg2,dfg,on='time')
In [435]:
df2.head()
Out[435]:
In [436]:
df2['srcp'] = df2.src_cnt/df2.total
In [437]:
df2.head()
Out[437]:
In [438]:
df2['entropy']=df2.srcp.apply(func=np.log2)*-1
In [439]:
df2.head()
Out[439]:
In [440]:
df2['pent'] = df2.srcp * df2.entropy
In [441]:
df2.head()
Out[441]:
In [442]:
dfg = df2.groupby('time')['pent'].sum()
In [443]:
dfg.plot()
Out[443]:
In [444]:
import struct
import socket
def ip2int(addr):
return struct.unpack("!I", socket.inet_aton(addr))[0]
def int2ip(addr):
return socket.inet_ntoa(struct.pack("!I", addr))
In [445]:
df['srcn'] = df.src.apply(ip2int)
In [446]:
df['srcn'] =df.srcn.apply(lambda x: x >>16 << 16 )
dfg = df.groupby('time')['src'].count()
In [447]:
df['src2']=df.srcn.apply(int2ip)
In [448]:
dfg2 = df.groupby(['time','src2'])['dst'].count()
In [449]:
dfg2.head()
Out[449]:
In [450]:
a,b=zip(*dfg2.index.tolist())
In [451]:
dfg2 = pd.DataFrame({'time':a,'src':b,'src_cnt':dfg2.values})
In [452]:
dfg = pd.DataFrame({'time': dfg.index, 'total':dfg.values})
In [453]:
df2 = pd.merge(dfg2,dfg,on='time')
df2.head()
Out[453]:
In [454]:
df2['srcp'] = df2.src_cnt/df2.total
df2['entropy']=df2.srcp.apply(func=np.log2)*-1
df2['pent'] = df2.srcp * df2.entropy
dfg = df2.groupby('time')['pent'].sum()
In [455]:
dfg.plot()
Out[455]:
In [456]:
df['srcn'] = df.src.apply(ip2int)
df['srcn'] =df.srcn.apply(lambda x: x >>24 << 24 )
dfg = df.groupby('time')['src'].count()
df['src2']=df.srcn.apply(int2ip)
dfg2 = df.groupby(['time','src2'])['dst'].count()
a,b=zip(*dfg2.index.tolist())
dfg2 = pd.DataFrame({'time':a,'src':b,'src_cnt':dfg2.values})
dfg = pd.DataFrame({'time': dfg.index, 'total':dfg.values})
df2 = pd.merge(dfg2,dfg,on='time')
df2['srcp'] = df2.src_cnt/df2.total
df2['entropy']=df2.srcp.apply(func=np.log2)*-1
df2['pent'] = df2.srcp * df2.entropy
dfg = df2.groupby('time')['pent'].sum()
dfg.plot()
df2.head()
Out[456]:
In [457]:
df['srcn'] = df.src.apply(ip2int)
df['srcn'] =df.srcn.apply(lambda x: x >>31 << 31 )
dfg = df.groupby('time')['src'].count()
df['src2']=df.srcn.apply(int2ip)
dfg2 = df.groupby(['time','src2'])['dst'].count()
a,b=zip(*dfg2.index.tolist())
dfg2 = pd.DataFrame({'time':a,'src':b,'src_cnt':dfg2.values})
dfg = pd.DataFrame({'time': dfg.index, 'total':dfg.values})
df2 = pd.merge(dfg2,dfg,on='time')
df2['srcp'] = df2.src_cnt/df2.total
df2['entropy']=df2.srcp.apply(func=np.log2)*-1
df2['pent'] = df2.srcp * df2.entropy
dfg = df2.groupby('time')['pent'].sum()
dfg.plot()
df2.head()
Out[457]:
In [458]:
df['srcn'] = df.src.apply(ip2int)
df['srcn'] =df.srcn.apply(lambda x: x >>32 << 32 )
dfg = df.groupby('time')['src'].count()
df['src2']=df.srcn.apply(int2ip)
dfg2 = df.groupby(['time','src2'])['dst'].count()
a,b=zip(*dfg2.index.tolist())
dfg2 = pd.DataFrame({'time':a,'src':b,'src_cnt':dfg2.values})
dfg = pd.DataFrame({'time': dfg.index, 'total':dfg.values})
df2 = pd.merge(dfg2,dfg,on='time')
df2['srcp'] = df2.src_cnt/df2.total
df2['entropy']=df2.srcp.apply(func=np.log2)*-1
df2['pent'] = df2.srcp * df2.entropy
dfg = df2.groupby('time')['pent'].sum()
dfg.plot()
df2.head()
Out[458]:
In [459]:
dfg= df.groupby('src')['src'].count()
df2 = pd.DataFrame({'src': dfg.index,'cnt': dfg.values, 'total':dfg.values.sum()})
df2['srcp'] = df2.cnt/df2.total
df2['entropy']=df2.srcp.apply(func=np.log2)*-1
df2['pent'] = df2.srcp * df2.entropy
In [460]:
df2.head()
Out[460]:
In [461]:
df2.entropy.plot()
Out[461]:
In [462]:
dfg= df.groupby(['src','dst'])['src'].count()
df2 = pd.DataFrame({'src': dfg.index,'cnt': dfg.values, 'total':dfg.values.sum()})
df2['srcp'] = df2.cnt/df2.total
df2['entropy']=df2.srcp.apply(func=np.log2)*-1
df2['pent'] = df2.srcp * df2.entropy
df2.head()
Out[462]:
In [463]:
df2.entropy.plot()
Out[463]:
In [464]:
df2.cnt.plot()
Out[464]:
In [ ]:
In [ ]:
In [ ]: