In [1]:
from IPython.display import HTML
HTML('''<h1>Testing with ANN</h1>

<ol>
    <li><a href='#Comparing-real-and-dummy-delays'>Comparison of delays between real and dummies</a></li>
</ol>
<hr/>''')





In [2]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [3]:
# Large plots
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 16, 9

In [4]:
import numpy as np
import pandas as pd

In [5]:
from hubbub.generator.generator import Simulator
from hubbub.generator.heartbeat import HeartBeatSimulator
from hubbub.datasets.simulations import simple_log, SIMPLE_LOG as SIMPLE_LOG_SAMPLE


Warning: This tool is designed for Python 3.

In [6]:
#SIMPLE_LOG = SIMPLE_LOG_SAMPLE
#SIMPLE_LOG_SAMPLE

# Generating "real" messages dataset:
SIMPLE_LOG = simple_log(n=200, days=1)
#SIMPLE_LOG[:10]

Running simulator


In [7]:
result_sm = Simulator(SIMPLE_LOG).run()
results_HB = [
    HeartBeatSimulator(SIMPLE_LOG).run() for i in xrange(5)
#    HeartBeatSimulator(SIMPLE_LOG).run(delay=lambda: 5) for i in xrange(10)
    ]

results_HB[0][:2]


Out[7]:
[(datetime.datetime(2000, 1, 1, 0, 0), 10),
 (datetime.datetime(2000, 1, 1, 0, 0, 0, 483881), 10)]

Converting results to timestamps for plotting:


In [8]:
import time
def timestamp(n):
    unix_time = time.mktime(n.timetuple()) + n.microsecond/1000000.
    return unix_time

In [9]:
r_real = pd.DataFrame(
    [(0, timestamp(i[0]), 'SIMPLE_LOG', i[1]) for i in SIMPLE_LOG],
    columns=('dummy', 'timestamp', 'source', 'length'),
    )
r_real.head()


Out[9]:
dummy timestamp source length
0 0 9.466812e+08 SIMPLE_LOG 10
1 0 9.466813e+08 SIMPLE_LOG 10
2 0 9.466839e+08 SIMPLE_LOG 10
3 0 9.466841e+08 SIMPLE_LOG 10
4 0 9.466846e+08 SIMPLE_LOG 10

5 rows × 4 columns


In [10]:
r_dummyHB = [
    pd.DataFrame(
        [(1, timestamp(i[0]), 'HB{}'.format(index), i[1]) for i in r],
        columns=('dummy', 'timestamp', 'source', 'length'),
        )
    for index, r in enumerate(results_HB)
    ]
r_dummyHB[0].head()


Out[10]:
dummy timestamp source length
0 1 9.466812e+08 HB0 10
1 1 9.466812e+08 HB0 10
2 1 9.466812e+08 HB0 10
3 1 9.466812e+08 HB0 10
4 1 9.466812e+08 HB0 10

5 rows × 4 columns

Analyzing delays

Computing deltas for n-3 to n+3


In [11]:
r_mixed = [
    pd.concat((r_real, r))
    for r in r_dummyHB
]
for r in r_mixed:
    r.sort('timestamp', inplace=True)
    r['dm3'] = r['timestamp'].diff(periods=+3)
    r['dm2'] = r['timestamp'].diff(periods=+2)
    r['dm1'] = r['timestamp'].diff(periods=+1)
    r['dp1'] = -r['timestamp'].diff(periods=-1)
    r['dp2'] = -r['timestamp'].diff(periods=-2)
    r['dp3'] = -r['timestamp'].diff(periods=-3)

r_mixed[0].head(10)


Out[11]:
dummy timestamp source length dm3 dm2 dm1 dp1 dp2 dp3
0 1 9.466812e+08 HB0 10 NaN NaN NaN 0.483881 3.798227 5.488650
1 1 9.466812e+08 HB0 10 NaN NaN 0.483881 3.314346 5.004769 6.011144
2 1 9.466812e+08 HB0 10 NaN 3.798227 3.314346 1.690423 2.696798 6.974775
3 1 9.466812e+08 HB0 10 5.488650 5.004769 1.690423 1.006375 5.284352 5.995023
4 1 9.466812e+08 HB0 10 6.011144 2.696798 1.006375 4.277977 4.988648 5.536058
5 1 9.466812e+08 HB0 10 6.974775 5.284352 4.277977 0.710671 1.258081 2.511761
6 1 9.466812e+08 HB0 10 5.995023 4.988648 0.710671 0.547410 1.801090 2.071607
7 1 9.466812e+08 HB0 10 5.536058 1.258081 0.547410 1.253680 1.524197 1.737170
8 1 9.466812e+08 HB0 10 2.511761 1.801090 1.253680 0.270517 0.483490 1.892113
9 1 9.466812e+08 HB0 10 2.071607 1.524197 0.270517 0.212973 1.621596 8.812251

10 rows × 10 columns

Creating the Neural Network


In [12]:
from pybrain.datasets            import ClassificationDataSet
from pybrain.utilities           import percentError
from pybrain.tools.shortcuts     import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules   import SoftmaxLayer

In [13]:
alldata = ClassificationDataSet(6, 1, nb_classes=3)

for row in r_mixed[0][:1000].iterrows():
    r = row[1]
    alldata.addSample((r.dm3, r.dm2, r.dm1, r.dp1, r.dp2, r.dp3), [r.dummy])

Randomly split the dataset into 75% training and 25% test data sets.


In [14]:
tstdata, trndata = alldata.splitWithProportion( 0.25 )
# For neural network classification, it is highly advisable to encode classes with one output neuron per class.
trndata._convertToOneOfMany( )
tstdata._convertToOneOfMany( )

Test our dataset by printing a little information about it.


In [15]:
print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]


Number of training patterns:  750
Input and output dimensions:  6 3
First sample (input, target, class):
[        nan         nan  0.483881    3.31434596  5.00476897  6.01114404] [0 1 0] [ 1.]

Now build a feed-forward network with 5 hidden units.


In [16]:
fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer )

In [17]:
trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)

In [18]:
for i in range(20):
    trainer.trainEpochs( 1 )
    
    trnresult = percentError( trainer.testOnClassData(),
                              trndata['class'] )
    tstresult = percentError( trainer.testOnClassData(
           dataset=tstdata ), tstdata['class'] )

    print "epoch: %4d" % trainer.totalepochs, \
          "  train error: %5.2f%%" % trnresult, \
          "  test error: %5.2f%%" % tstresult


('Total error:', nan)
epoch:    1   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    2   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    3   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    4   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    5   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    6   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    7   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    8   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    9   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   10   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   11   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   12   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   13   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   14   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   15   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   16   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   17   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   18   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   19   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   20   train error: 99.87%   test error: 99.60%

In [18]: