In [1]:
from IPython.display import HTML
HTML('''<h1>Testing with ANN</h1>
<ol>
<li><a href='#Comparing-real-and-dummy-delays'>Comparison of delays between real and dummies</a></li>
</ol>
<hr/>''')
Out[1]:
In [2]:
%pylab inline
In [3]:
# Large plots
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 16, 9
In [4]:
import numpy as np
import pandas as pd
In [5]:
from hubbub.generator.generator import Simulator
from hubbub.generator.heartbeat import HeartBeatSimulator
from hubbub.datasets.simulations import simple_log, SIMPLE_LOG as SIMPLE_LOG_SAMPLE
In [6]:
#SIMPLE_LOG = SIMPLE_LOG_SAMPLE
#SIMPLE_LOG_SAMPLE
# Generating "real" messages dataset:
SIMPLE_LOG = simple_log(n=200, days=1)
#SIMPLE_LOG[:10]
In [7]:
result_sm = Simulator(SIMPLE_LOG).run()
results_HB = [
HeartBeatSimulator(SIMPLE_LOG).run() for i in xrange(5)
# HeartBeatSimulator(SIMPLE_LOG).run(delay=lambda: 5) for i in xrange(10)
]
results_HB[0][:2]
Out[7]:
In [8]:
import time
def timestamp(n):
unix_time = time.mktime(n.timetuple()) + n.microsecond/1000000.
return unix_time
In [9]:
r_real = pd.DataFrame(
[(0, timestamp(i[0]), 'SIMPLE_LOG', i[1]) for i in SIMPLE_LOG],
columns=('dummy', 'timestamp', 'source', 'length'),
)
r_real.head()
Out[9]:
In [10]:
r_dummyHB = [
pd.DataFrame(
[(1, timestamp(i[0]), 'HB{}'.format(index), i[1]) for i in r],
columns=('dummy', 'timestamp', 'source', 'length'),
)
for index, r in enumerate(results_HB)
]
r_dummyHB[0].head()
Out[10]:
In [11]:
r_mixed = [
pd.concat((r_real, r))
for r in r_dummyHB
]
for r in r_mixed:
r.sort('timestamp', inplace=True)
r['dm3'] = r['timestamp'].diff(periods=+3)
r['dm2'] = r['timestamp'].diff(periods=+2)
r['dm1'] = r['timestamp'].diff(periods=+1)
r['dp1'] = -r['timestamp'].diff(periods=-1)
r['dp2'] = -r['timestamp'].diff(periods=-2)
r['dp3'] = -r['timestamp'].diff(periods=-3)
r_mixed[0].head(10)
Out[11]:
In [12]:
from pybrain.datasets import ClassificationDataSet
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
In [13]:
alldata = ClassificationDataSet(6, 1, nb_classes=3)
for row in r_mixed[0][:1000].iterrows():
r = row[1]
alldata.addSample((r.dm3, r.dm2, r.dm1, r.dp1, r.dp2, r.dp3), [r.dummy])
Randomly split the dataset into 75% training and 25% test data sets.
In [14]:
tstdata, trndata = alldata.splitWithProportion( 0.25 )
# For neural network classification, it is highly advisable to encode classes with one output neuron per class.
trndata._convertToOneOfMany( )
tstdata._convertToOneOfMany( )
Test our dataset by printing a little information about it.
In [15]:
print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]
Now build a feed-forward network with 5 hidden units.
In [16]:
fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer )
In [17]:
trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)
In [18]:
for i in range(20):
trainer.trainEpochs( 1 )
trnresult = percentError( trainer.testOnClassData(),
trndata['class'] )
tstresult = percentError( trainer.testOnClassData(
dataset=tstdata ), tstdata['class'] )
print "epoch: %4d" % trainer.totalepochs, \
" train error: %5.2f%%" % trnresult, \
" test error: %5.2f%%" % tstresult
In [18]: