The goal here is to test an exponential duration instead of a [0-1]^2
In [1]:
from IPython.display import HTML
HTML('''<h1>Table of Contents</h1>
<ol>
<li><a href='#Comparing-real-and-dummy-delays'>Comparison of delays between real and dummies</a></li>
</ol>
<hr/>''')
Out[1]:
In [2]:
%pylab inline
import sys, os
# Add parent path
sys.path.append(os.getcwd()[:-len('/notebooks')])
In [3]:
# Large plots
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 16, 9
In [4]:
import numpy as np
import pandas as pd
In [5]:
from hubbub.generator.generator import Simulator
from hubbub.generator.heartbeat import HeartBeatSimulator
from hubbub.datasets.simulations import simple_log, gauss_log, SIMPLE_LOG as SIMPLE_LOG_SAMPLE
In [6]:
# Let's monkey patch the delay() function of the simulator with an exponential:
def delay_expo(instance):
return instance.period * 1 * -log(random.random())
#return 0.5
#return instance.period * 2 * (exp(random.random())-1)
#return instance.period * 2 * random()**2
HeartBeatSimulator.delay = delay_expo
In [7]:
#SIMPLE_LOG = SIMPLE_LOG_SAMPLE
SIMPLE_LOG_SAMPLE
Out[7]:
In [8]:
# Generating "real" messages dataset:
SIMPLE_LOG = simple_log(n=2000)
#SIMPLE_LOG = gauss_log(n=200)
SIMPLE_LOG[:10]
Out[8]:
In [9]:
HeartBeatSimulator.period = 0.5
result_sm = Simulator(SIMPLE_LOG).run()
results_HB = [
HeartBeatSimulator(SIMPLE_LOG).run() for i in range(5)
# HeartBeatSimulator(SIMPLE_LOG).run(delay=lambda: 5) for i in xrange(10)
]
results_HB[0][:10]
Out[9]:
In [10]:
import time
def timestamp(n):
unix_time = time.mktime(n.timetuple()) + n.microsecond/1000000.
return unix_time
In [11]:
r_real = pd.DataFrame(
[(0, timestamp(i[0]), 'SIMPLE_LOG') for i in SIMPLE_LOG],
columns=('dummy', 'timestamp', 'source'),
)
r_real
Out[11]:
In [12]:
r_dummyHB = [
pd.DataFrame(
[(1, timestamp(i[0]), 'HB{}'.format(index)) for i in r],
columns=('dummy', 'timestamp', 'source'),
)
for index, r in enumerate(results_HB)
]
r_dummyHB[0]
Out[12]:
In [13]:
r = r_dummyHB[0]
figure()
plot(r['timestamp'][:100].diff(), 'o-')
show()
figure()
for r in r_dummyHB:
plot(r['timestamp'][:100].diff(), 'o')
show()
In [14]:
r_mixed = [
pd.concat((r_real, r))
for r in r_dummyHB
]
for r in r_mixed:
r.sort('timestamp', inplace=True)
r['before'] = r['timestamp'].diff()
r['after'] = -r['timestamp'].diff(periods=-1)
r_mixed[0].head(10)
Out[14]:
In [15]:
print(r_real['timestamp'][0])
r = r_mixed[0]
plot(r['before'])
r[r['dummy'] == 0].head(10)
Out[15]:
In [16]:
# Average delay
if False:
for r in r_mixed:
print('all mean', r['before'].mean())
print('all std ', r['before'].std())
print('dumm mean', r[r['dummy'] == 1]['before'].mean())
print('dumm std ', r[r['dummy'] == 1]['before'].std())
print('real mean', r[r['dummy'] == 0]['before'].mean())
print('real std ', r[r['dummy'] == 0]['before'].std())
print
bar(
range(0, len(r_mixed)*3, 3),
[r['before'].mean() for r in r_mixed],
yerr=[r['before'].std() for r in r_mixed],
color='g',
)
bar(
range(1, len(r_mixed)*3, 3),
[r[r['dummy'] == 1]['before'].mean() for r in r_mixed],
yerr=[r[r['dummy'] == 1]['before'].std() for r in r_mixed],
color='y',
)
bar(
range(2, len(r_mixed)*3, 3),
[r[r['dummy'] == 0]['before'].mean() for r in r_mixed],
yerr=[r[r['dummy'] == 0]['before'].std() for r in r_mixed],
color='r',
)
Out[16]:
In [17]:
# Average delay
if False:
for r in r_mixed:
print('all mean', r['after'].mean())
print('all std ', r['after'].std())
print('dumm mean', r[r['dummy'] == 1]['after'].mean())
print('dumm std ', r[r['dummy'] == 1]['after'].std())
print('real mean', r[r['dummy'] == 0]['after'].mean())
print('real std ', r[r['dummy'] == 0]['after'].std())
print
bar(
range(0, len(r_mixed)*3, 3),
[r['after'].mean() for r in r_mixed],
yerr=[r['after'].std() for r in r_mixed],
color='g',
)
bar(
range(1, len(r_mixed)*3, 3),
[r[r['dummy'] == 1]['after'].mean() for r in r_mixed],
yerr=[r[r['dummy'] == 1]['after'].std() for r in r_mixed],
color='y',
)
bar(
range(2, len(r_mixed)*3, 3),
[r[r['dummy'] == 0]['after'].mean() for r in r_mixed],
yerr=[r[r['dummy'] == 0]['after'].std() for r in r_mixed],
color='r',
)
Out[17]:
In [18]:
r = r_mixed[0]
figure()
plot(r['before'][:100], 'o-')
show()
figure()
for r in r_mixed:
plot(r['before'][:100], 'o')
show()
In [22]:
distrib_real = r[r['dummy'] == 0]['before'].copy()
distrib_real.sort()
distrib_dummy = r[r['dummy'] == 1]['before'].copy()
distrib_dummy.sort()
figure()
title('dummy messages')
plot(distrib_dummy, '-')
show()
figure()
title('real messages')
plot(distrib_real, '-')
figure()
title('dummy + adapted real messages')
plot(distrib_dummy, '-')
mapped_x_axis = np.array(range(len(distrib_real))) * len(distrib_dummy) / float(len(distrib_real))
plot(mapped_x_axis, distrib_real, '-')
show()
In [20]:
distrib_real = r[r['dummy'] == 0]['after'].copy()
distrib_real.sort()
distrib_dummy = r[r['dummy'] == 1]['after'].copy()
distrib_dummy.sort()
figure()
title('dummy messages')
plot(distrib_dummy, 'x')
show()
figure()
title('real messages')
plot(distrib_real, 'x')
figure()
title('dummy + adapted real messages')
plot(distrib_dummy, '-')
mapped_x_axis = np.array(range(len(distrib_real))) * len(distrib_dummy) / float(len(distrib_real))
plot(mapped_x_axis, distrib_real, '-')
show()
In [21]:
distrib_real = pd.DataFrame(r[r['dummy'] == 0]['before'].copy())
distrib_real.sort()
distrib_real['group'] = pd.cut(distrib_real, bins=[2, 4, 6, 8, 10])
#distrib_real.set_index(['group'], inplace=True)
#distrib_real.unstack('group')
distrib_real.groupby('group')
distrib_real
In [ ]: