notebook.community

Edit and run



In [1]:

    
from IPython.display import HTML
HTML('''<h1>Testing with ANN</h1>

<ol>
    <li><a href='#Comparing-real-and-dummy-delays'>Comparison of delays between real and dummies</a></li>
</ol>
<hr/>''')









    Out[1]:




Testing with ANN


    Comparison of delays between real and dummies



In [2]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [3]:

    
# Large plots
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 16, 9



In [4]:

    
import numpy as np
import pandas as pd



In [5]:

    
from hubbub.generator.generator import Simulator
from hubbub.generator.heartbeat import HeartBeatSimulator
from hubbub.datasets.simulations import simple_log, SIMPLE_LOG as SIMPLE_LOG_SAMPLE









    



Warning: This tool is designed for Python 3.



In [6]:

    
#SIMPLE_LOG = SIMPLE_LOG_SAMPLE
#SIMPLE_LOG_SAMPLE

# Generating "real" messages dataset:
SIMPLE_LOG = simple_log(n=200, days=1)
#SIMPLE_LOG[:10]

Running simulator



In [7]:

    
result_sm = Simulator(SIMPLE_LOG).run()
results_HB = [
    HeartBeatSimulator(SIMPLE_LOG).run() for i in xrange(5)
#    HeartBeatSimulator(SIMPLE_LOG).run(delay=lambda: 5) for i in xrange(10)
    ]

results_HB[0][:2]









    Out[7]:





[(datetime.datetime(2000, 1, 1, 0, 0), 10),
 (datetime.datetime(2000, 1, 1, 0, 0, 0, 483881), 10)]

Converting results to timestamps for plotting:



In [8]:

    
import time
def timestamp(n):
    unix_time = time.mktime(n.timetuple()) + n.microsecond/1000000.
    return unix_time



In [9]:

    
r_real = pd.DataFrame(
    [(0, timestamp(i[0]), 'SIMPLE_LOG', i[1]) for i in SIMPLE_LOG],
    columns=('dummy', 'timestamp', 'source', 'length'),
    )
r_real.head()









    Out[9]:






  
    
      
      dummy
      timestamp
      source
      length
    
  
  
    
      0
       0
       9.466812e+08
       SIMPLE_LOG
       10
    
    
      1
       0
       9.466813e+08
       SIMPLE_LOG
       10
    
    
      2
       0
       9.466839e+08
       SIMPLE_LOG
       10
    
    
      3
       0
       9.466841e+08
       SIMPLE_LOG
       10
    
    
      4
       0
       9.466846e+08
       SIMPLE_LOG
       10
    
  

5 rows × 4 columns



In [10]:

    
r_dummyHB = [
    pd.DataFrame(
        [(1, timestamp(i[0]), 'HB{}'.format(index), i[1]) for i in r],
        columns=('dummy', 'timestamp', 'source', 'length'),
        )
    for index, r in enumerate(results_HB)
    ]
r_dummyHB[0].head()









    Out[10]:






  
    
      
      dummy
      timestamp
      source
      length
    
  
  
    
      0
       1
       9.466812e+08
       HB0
       10
    
    
      1
       1
       9.466812e+08
       HB0
       10
    
    
      2
       1
       9.466812e+08
       HB0
       10
    
    
      3
       1
       9.466812e+08
       HB0
       10
    
    
      4
       1
       9.466812e+08
       HB0
       10
    
  

5 rows × 4 columns

Analyzing delays

Computing deltas for n-3 to n+3



In [11]:

    
r_mixed = [
    pd.concat((r_real, r))
    for r in r_dummyHB
]
for r in r_mixed:
    r.sort('timestamp', inplace=True)
    r['dm3'] = r['timestamp'].diff(periods=+3)
    r['dm2'] = r['timestamp'].diff(periods=+2)
    r['dm1'] = r['timestamp'].diff(periods=+1)
    r['dp1'] = -r['timestamp'].diff(periods=-1)
    r['dp2'] = -r['timestamp'].diff(periods=-2)
    r['dp3'] = -r['timestamp'].diff(periods=-3)

r_mixed[0].head(10)









    Out[11]:






  
    
      
      dummy
      timestamp
      source
      length
      dm3
      dm2
      dm1
      dp1
      dp2
      dp3
    
  
  
    
      0
       1
       9.466812e+08
       HB0
       10
            NaN
            NaN
            NaN
       0.483881
       3.798227
       5.488650
    
    
      1
       1
       9.466812e+08
       HB0
       10
            NaN
            NaN
       0.483881
       3.314346
       5.004769
       6.011144
    
    
      2
       1
       9.466812e+08
       HB0
       10
            NaN
       3.798227
       3.314346
       1.690423
       2.696798
       6.974775
    
    
      3
       1
       9.466812e+08
       HB0
       10
       5.488650
       5.004769
       1.690423
       1.006375
       5.284352
       5.995023
    
    
      4
       1
       9.466812e+08
       HB0
       10
       6.011144
       2.696798
       1.006375
       4.277977
       4.988648
       5.536058
    
    
      5
       1
       9.466812e+08
       HB0
       10
       6.974775
       5.284352
       4.277977
       0.710671
       1.258081
       2.511761
    
    
      6
       1
       9.466812e+08
       HB0
       10
       5.995023
       4.988648
       0.710671
       0.547410
       1.801090
       2.071607
    
    
      7
       1
       9.466812e+08
       HB0
       10
       5.536058
       1.258081
       0.547410
       1.253680
       1.524197
       1.737170
    
    
      8
       1
       9.466812e+08
       HB0
       10
       2.511761
       1.801090
       1.253680
       0.270517
       0.483490
       1.892113
    
    
      9
       1
       9.466812e+08
       HB0
       10
       2.071607
       1.524197
       0.270517
       0.212973
       1.621596
       8.812251
    
  

10 rows × 10 columns

Creating the Neural Network



In [12]:

    
from pybrain.datasets            import ClassificationDataSet
from pybrain.utilities           import percentError
from pybrain.tools.shortcuts     import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules   import SoftmaxLayer



In [13]:

    
alldata = ClassificationDataSet(6, 1, nb_classes=3)

for row in r_mixed[0][:1000].iterrows():
    r = row[1]
    alldata.addSample((r.dm3, r.dm2, r.dm1, r.dp1, r.dp2, r.dp3), [r.dummy])

Randomly split the dataset into 75% training and 25% test data sets.



In [14]:

    
tstdata, trndata = alldata.splitWithProportion( 0.25 )
# For neural network classification, it is highly advisable to encode classes with one output neuron per class.
trndata._convertToOneOfMany( )
tstdata._convertToOneOfMany( )

Test our dataset by printing a little information about it.



In [15]:

    
print "Number of training patterns: ", len(trndata)
print "Input and output dimensions: ", trndata.indim, trndata.outdim
print "First sample (input, target, class):"
print trndata['input'][0], trndata['target'][0], trndata['class'][0]









    



Number of training patterns:  750
Input and output dimensions:  6 3
First sample (input, target, class):
[        nan         nan  0.483881    3.31434596  5.00476897  6.01114404] [0 1 0] [ 1.]

Now build a feed-forward network with 5 hidden units.



In [16]:

    
fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer )



In [17]:

    
trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)



In [18]:

    
for i in range(20):
    trainer.trainEpochs( 1 )
    
    trnresult = percentError( trainer.testOnClassData(),
                              trndata['class'] )
    tstresult = percentError( trainer.testOnClassData(
           dataset=tstdata ), tstdata['class'] )

    print "epoch: %4d" % trainer.totalepochs, \
          "  train error: %5.2f%%" % trnresult, \
          "  test error: %5.2f%%" % tstresult









    



('Total error:', nan)
epoch:    1   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    2   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    3   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    4   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    5   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    6   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    7   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    8   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:    9   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   10   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   11   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   12   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   13   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   14   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   15   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   16   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   17   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   18   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   19   train error: 99.87%   test error: 99.60%
('Total error:', nan)
epoch:   20   train error: 99.87%   test error: 99.60%



In [18]:

	timestamp	source	length
0	9.466812e+08	SIMPLE_LOG	10
1	9.466813e+08	SIMPLE_LOG	10
2	9.466839e+08	SIMPLE_LOG	10
3	9.466841e+08	SIMPLE_LOG	10
4	9.466846e+08	SIMPLE_LOG	10

	dummy	timestamp	source	length	dm3	dm2	dm1	dp1	dp2	dp3
0	1	9.466812e+08	HB0	10	NaN	NaN	NaN	0.483881	3.798227	5.488650
1	1	9.466812e+08	HB0	10	NaN	NaN	0.483881	3.314346	5.004769	6.011144
2	1	9.466812e+08	HB0	10	NaN	3.798227	3.314346	1.690423	2.696798	6.974775
3	1	9.466812e+08	HB0	10	5.488650	5.004769	1.690423	1.006375	5.284352	5.995023
4	1	9.466812e+08	HB0	10	6.011144	2.696798	1.006375	4.277977	4.988648	5.536058
5	1	9.466812e+08	HB0	10	6.974775	5.284352	4.277977	0.710671	1.258081	2.511761
6	1	9.466812e+08	HB0	10	5.995023	4.988648	0.710671	0.547410	1.801090	2.071607
7	1	9.466812e+08	HB0	10	5.536058	1.258081	0.547410	1.253680	1.524197	1.737170
8	1	9.466812e+08	HB0	10	2.511761	1.801090	1.253680	0.270517	0.483490	1.892113
9	1	9.466812e+08	HB0	10	2.071607	1.524197	0.270517	0.212973	1.621596	8.812251