In [1]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [2]:
# Large plots
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 16, 9  # that's default image size for this interactive session

In [3]:
import numpy as np

In [4]:
from hubbub.generator.generator import Simulator
from hubbub.generator.heartbeat import HeartBeatSimulator
from hubbub.datasets.simulations import SIMPLE_LOG


Warning: This tool is designed for Python 3.

In [5]:
SIMPLE_LOG


Out[5]:
[{'date': datetime.datetime(2000, 1, 1, 0, 1, 1)},
 {'date': datetime.datetime(2000, 1, 1, 0, 2, 9)},
 {'date': datetime.datetime(2000, 1, 1, 0, 3, 2)},
 {'date': datetime.datetime(2000, 1, 1, 0, 4, 8)},
 {'date': datetime.datetime(2000, 1, 1, 0, 5, 3)},
 {'date': datetime.datetime(2000, 1, 1, 0, 6, 7)},
 {'date': datetime.datetime(2000, 1, 1, 0, 7, 4)},
 {'date': datetime.datetime(2000, 1, 1, 0, 8, 6)},
 {'date': datetime.datetime(2000, 1, 1, 0, 9, 5)},
 {'date': datetime.datetime(2000, 1, 1, 0, 10)}]

Running simulator


In [6]:
result_sm = Simulator(SIMPLE_LOG).run()
results_hb = [
    HeartBeatSimulator(SIMPLE_LOG).run() for i in xrange(5)
#    HeartBeatSimulator(SIMPLE_LOG).run(delay=lambda: 5) for i in xrange(10)
    ]

In [7]:
results_hb[0][:5]


Out[7]:
[{'date': datetime.datetime(2000, 1, 1, 0, 0)},
 {'date': datetime.datetime(2000, 1, 1, 0, 0, 6, 337650)},
 {'date': datetime.datetime(2000, 1, 1, 0, 0, 11, 167772)},
 {'date': datetime.datetime(2000, 1, 1, 0, 0, 19, 575848)},
 {'date': datetime.datetime(2000, 1, 1, 0, 0, 24, 50139)}]

In [8]:
results_hb[0] == HeartBeatSimulator(SIMPLE_LOG).run()


Out[8]:
False

Converting results to timestamps for plotting:


In [9]:
import time
def timestamp(n):
    unix_time = time.mktime(n.timetuple())
    return unix_time

In [10]:
start_time = timestamp(result_sm[0]['date'])
remap = np.array([start_time + i*5 for i in xrange(len(result_sm))])
remap


Out[10]:
array([  9.46681200e+08,   9.46681205e+08,   9.46681210e+08, ...,
         9.46767585e+08,   9.46767590e+08,   9.46767595e+08])

In [11]:
r_real = np.array([timestamp(i['date']) for i in SIMPLE_LOG])
r_real


Out[11]:
array([  9.46681261e+08,   9.46681329e+08,   9.46681382e+08,
         9.46681448e+08,   9.46681503e+08,   9.46681567e+08,
         9.46681624e+08,   9.46681686e+08,   9.46681745e+08,
         9.46681800e+08])

Dummy messages

Simple generator.


In [12]:
r_sm = np.array([timestamp(i['date']) for i in result_sm])
figure()
plot(r_sm[:50])
plot(remap[:50])
show()


HeartBeat generator


In [13]:
r_hb = np.array([
    np.array([timestamp(i['date']) for i in r])
    for r in results_hb
    ])
type(r_hb[0])


Out[13]:
numpy.ndarray

In [14]:
figure()
plot(remap[:50])
for r in r_hb[:5]:
    plot(r[:50])
show()

figure()
for r in r_hb[:5]:
    plot(r[:50] - remap[:50])
show()

figure()
for r in r_hb[:5]:
    plot(r[1:51] - r[0:50], 'x-')
show()


Dummy messages + real messages

Simple generator


In [15]:
r_real_sm = np.concatenate((r_sm, r_real))
r_real_sm.sort()

figure()
title('real + dummy')
plot(r_real_sm[:50])
plot(remap[:50])
show()

figure()
plot(r_real_sm[:50] - remap[:50])
show()


HeartBeat generator


In [16]:
r_real_hb = [
    np.concatenate((r_real, r_i))
    for r_i in r_hb
]
for i in r_real_hb:
    i.sort()

In [17]:
figure()
title('real + hb dummy')
plot(r_real_sm[:50])
plot(remap[:100])
for i in r_real_hb:
    plot(i[:100], '-x')
legend(["simple", "remap", "heartbeat0", "heartbeat1", "heartbeat2"])
show()

#figure()
##plot(r_real_sm[:50] - remap[:50])
#for i in r_real_hb:
#    plot(i[:100] - remap[:100], '-x')
#legend(["simple", "remap", "heartbeat0", "heartbeat1", "heartbeat2"])
#show()



In [33]:
# time delta
figure()
title('hb dummy only delay')
for i in r_hb:
    plot(i[1:121] - i[0:120], 'o-')
    delays = i[1:121] - i[0:120]
    print '-', delays.mean()
    print ' ', delays.std()
show()


- 4.99166666667
  2.92545105734
- 5.2
  3.51567916625
- 5.08333333333
  3.05664122127
- 4.95
  3.25538016213
- 4.89166666667
  2.99665322578

In [35]:
# time delta
figure()
title('real + hb dummy delay')
for i in r_real_hb:
    plot(i[1:121] - i[0:120], 'o-')
    delays = i[1:121] - i[0:120]
    print '-', delays.mean()
    print ' ', delays.std()
show()


- 4.575
  2.8828877305
- 4.78333333333
  3.0827026382
- 4.79166666667
  2.95774642065
- 4.74166666667
  3.0862486218
- 4.49166666667
  2.81364482873

In [20]:
# Comparing average and simple
figure()
plot(r_real_sm[:50] - remap[:50])
plot(
    np.sum(i[:50] for i in r_real_hb) / len(r_real_hb)
    - remap[:50]
    )
show()


Resampling


In [21]:
from pandas import Series, DataFrame

In [22]:
dates_hb = [i['date'] for i in results_hb[0]]
s_hb = Series([1] * len(results_hb[0]), index=dates_hb)
s_hb.head()


Out[22]:
2000-01-01 00:00:00           1
2000-01-01 00:00:06.337650    1
2000-01-01 00:00:11.167772    1
2000-01-01 00:00:19.575848    1
2000-01-01 00:00:24.050139    1
dtype: int64

In [23]:
resampled_hb = s_hb[:1000].resample('1Min', how='sum')
plot(range(len(resampled_hb)), resampled_hb, 'o')


Out[23]:
[<matplotlib.lines.Line2D at 0x108888ad0>]

In [23]:


In [23]: