In [21]:
from IPython.display import HTML
HTML('''<h1>Table of Contents</h1>
<ol>
<li><a href='#Comparing-real-and-dummy-delays'>Comparison of delays between real and dummies</a></li>
</ol>
<hr/>''')
Out[21]:
In [22]:
%pylab inline
from __future__ import print_function
if not 'xrange' in globals():
xrange = range
import sys, os
# Add parent path
sys.path.append(os.getcwd()[:-len('/notebooks')])
In [23]:
# Large plots
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 16, 9
In [24]:
import numpy as np
import pandas as pd
In [25]:
from hubbub.generator.generator import Simulator
from hubbub.generator.heartbeat import HeartBeatSimulator
from hubbub.datasets.simulations import simple_log, gauss_log, SIMPLE_LOG as SIMPLE_LOG_SAMPLE
In [26]:
#SIMPLE_LOG = SIMPLE_LOG_SAMPLE
SIMPLE_LOG_SAMPLE
Out[26]:
In [27]:
# Generating "real" messages dataset:
SIMPLE_LOG = simple_log(n=2000)
#SIMPLE_LOG = gauss_log(n=200)
SIMPLE_LOG[:10]
Out[27]:
In [28]:
HeartBeatSimulator.period = 0.5
result_sm = Simulator(SIMPLE_LOG).run()
results_HB = [
HeartBeatSimulator(SIMPLE_LOG).run() for i in xrange(5)
# HeartBeatSimulator(SIMPLE_LOG).run(delay=lambda: 5) for i in xrange(10)
]
results_HB[0][:10]
Out[28]:
In [29]:
import time
def timestamp(n):
unix_time = time.mktime(n.timetuple()) + n.microsecond/1000000.
return unix_time
In [30]:
r_real = pd.DataFrame(
[(0, timestamp(i[0]), 'SIMPLE_LOG') for i in SIMPLE_LOG],
columns=('dummy', 'timestamp', 'source'),
)
r_real
Out[30]:
In [31]:
r_dummyHB = [
pd.DataFrame(
[(1, timestamp(i[0]), 'HB{}'.format(index)) for i in r],
columns=('dummy', 'timestamp', 'source'),
)
for index, r in enumerate(results_HB)
]
r_dummyHB[0]
Out[31]:
In [32]:
r = r_dummyHB[0]
figure()
plot(r['timestamp'][:100].diff(), 'o-')
show()
figure()
for r in r_dummyHB:
plot(r['timestamp'][:100].diff(), 'o')
show()
In [33]:
r_mixed = [
pd.concat((r_real, r))
for r in r_dummyHB
]
for r in r_mixed:
r.sort('timestamp', inplace=True)
r['before'] = r['timestamp'].diff()
r['after'] = -r['timestamp'].diff(periods=-1)
r_mixed[0].head(10)
Out[33]:
In [34]:
print(r_real['timestamp'][0])
r = r_mixed[0]
plot(r['before'])
r[r['dummy'] == 0].head(10)
Out[34]:
In [35]:
# Average delay
if False:
for r in r_mixed:
print('all mean', r['before'].mean())
print('all std ', r['before'].std())
print('dumm mean', r[r['dummy'] == 1]['before'].mean())
print('dumm std ', r[r['dummy'] == 1]['before'].std())
print('real mean', r[r['dummy'] == 0]['before'].mean())
print('real std ', r[r['dummy'] == 0]['before'].std())
print
bar(
range(0, len(r_mixed)*3, 3),
[r['before'].mean() for r in r_mixed],
yerr=[r['before'].std() for r in r_mixed],
color='g',
)
bar(
range(1, len(r_mixed)*3, 3),
[r[r['dummy'] == 1]['before'].mean() for r in r_mixed],
yerr=[r[r['dummy'] == 1]['before'].std() for r in r_mixed],
color='y',
)
bar(
range(2, len(r_mixed)*3, 3),
[r[r['dummy'] == 0]['before'].mean() for r in r_mixed],
yerr=[r[r['dummy'] == 0]['before'].std() for r in r_mixed],
color='r',
)
Out[35]:
In [36]:
# Average delay
if False:
for r in r_mixed:
print('all mean', r['after'].mean())
print('all std ', r['after'].std())
print('dumm mean', r[r['dummy'] == 1]['after'].mean())
print('dumm std ', r[r['dummy'] == 1]['after'].std())
print('real mean', r[r['dummy'] == 0]['after'].mean())
print('real std ', r[r['dummy'] == 0]['after'].std())
print
bar(
range(0, len(r_mixed)*3, 3),
[r['after'].mean() for r in r_mixed],
yerr=[r['after'].std() for r in r_mixed],
color='g',
)
bar(
range(1, len(r_mixed)*3, 3),
[r[r['dummy'] == 1]['after'].mean() for r in r_mixed],
yerr=[r[r['dummy'] == 1]['after'].std() for r in r_mixed],
color='y',
)
bar(
range(2, len(r_mixed)*3, 3),
[r[r['dummy'] == 0]['after'].mean() for r in r_mixed],
yerr=[r[r['dummy'] == 0]['after'].std() for r in r_mixed],
color='r',
)
Out[36]:
In [37]:
r = r_mixed[0]
figure()
plot(r['before'][:100], 'o-')
show()
figure()
for r in r_mixed:
plot(r['before'][:100], 'o')
show()
In [38]:
distrib_real = r[r['dummy'] == 0]['before'].copy()
distrib_real.sort()
distrib_dummy = r[r['dummy'] == 1]['before'].copy()
distrib_dummy.sort()
figure()
title('dummy messages')
plot(distrib_dummy, '-')
show()
figure()
title('real messages')
plot(distrib_real, '-')
figure()
title('dummy + adapted real messages')
plot(distrib_dummy, '-')
mapped_x_axis = np.array(range(len(distrib_real))) * len(distrib_dummy) / float(len(distrib_real))
plot(mapped_x_axis, distrib_real, '-')
show()
In [39]:
distrib_real = r[r['dummy'] == 0]['after'].copy()
distrib_real.sort()
distrib_dummy = r[r['dummy'] == 1]['after'].copy()
distrib_dummy.sort()
figure()
title('dummy messages')
plot(distrib_dummy, '-')
show()
figure()
title('real messages')
plot(distrib_real, '-')
figure()
title('dummy + adapted real messages')
plot(distrib_dummy, '-')
mapped_x_axis = np.array(range(len(distrib_real))) * len(distrib_dummy) / float(len(distrib_real))
plot(mapped_x_axis, distrib_real, '-')
show()
In [40]:
distrib_real = pd.DataFrame(r[r['dummy'] == 0]['before'].copy())
distrib_real.sort()
distrib_real['group'] = pd.cut(distrib_real, bins=[2, 4, 6, 8, 10])
#distrib_real.set_index(['group'], inplace=True)
#distrib_real.unstack('group')
distrib_real.groupby('group')
distrib_real
In [ ]: