In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
import pandas as pd

In [3]:
from __future__ import division

In [4]:
res = pd.DataFrame.from_csv('results.csv', index_col='Time')

In [5]:
res['diff'] = res.timesec.diff()

In [6]:
res.head(20)


Out[6]:
queries record success timesec diff
Time
2015-09-26 17:55:19.580983 1 0 1 1.443315e+09 NaN
2015-09-26 17:55:21.596949 1 1 1 1.443315e+09 2.015966
2015-09-26 17:55:23.328721 0 2 0 1.443315e+09 1.731772
2015-09-26 17:56:23.395952 1 3 1 1.443315e+09 60.067231
2015-09-26 17:56:25.161067 1 4 1 1.443315e+09 1.765115
2015-09-26 17:56:27.000792 1 5 1 1.443315e+09 1.839725
2015-09-26 17:56:28.999071 1 6 1 1.443315e+09 1.998279
2015-09-26 17:56:30.831533 1 7 1 1.443315e+09 1.832462
2015-09-26 17:56:32.608323 1 8 1 1.443315e+09 1.776790
2015-09-26 17:56:34.423974 1 9 1 1.443315e+09 1.815651
2015-09-26 17:56:36.217108 1 10 1 1.443315e+09 1.793134
2015-09-26 17:56:37.973393 0 11 0 1.443315e+09 1.756285
2015-09-26 17:57:38.041299 1 12 1 1.443315e+09 60.067906
2015-09-26 17:57:39.767918 1 13 1 1.443315e+09 1.726619
2015-09-26 17:57:41.514982 1 14 1 1.443315e+09 1.747064
2015-09-26 17:57:43.200429 1 15 1 1.443315e+09 1.685447
2015-09-26 17:57:44.945286 1 16 1 1.443315e+09 1.744857
2015-09-26 17:57:46.661192 1 17 1 1.443315e+09 1.715906
2015-09-26 17:57:48.482668 1 18 1 1.443315e+09 1.821476
2015-09-26 17:57:50.184394 1 19 1 1.443315e+09 1.701726

In [7]:
grouped = res.groupby('success')

In [8]:
xx = grouped.get_group(0)
fails = xx.copy(deep=True)
fails['timeDelta'] = fails.timesec.diff()
yy = grouped.get_group(1)
success = yy.copy(deep=True)
success['timeDelta'] = success.timesec.diff()

In [9]:
fails.head()


Out[9]:
queries record success timesec diff timeDelta
Time
2015-09-26 17:55:23.328721 0 2 0 1.443315e+09 1.731772 NaN
2015-09-26 17:56:37.973393 0 11 0 1.443315e+09 1.756285 74.644672
2015-09-26 17:58:11.924012 0 31 0 1.443315e+09 1.744899 93.950619
2015-09-26 17:59:22.102462 0 38 0 1.443316e+09 1.703951 70.178450
2015-09-26 18:00:47.043188 0 53 0 1.443316e+09 1.656789 84.940726

In [10]:
success.head()


Out[10]:
queries record success timesec diff timeDelta
Time
2015-09-26 17:55:19.580983 1 0 1 1.443315e+09 NaN NaN
2015-09-26 17:55:21.596949 1 1 1 1.443315e+09 2.015966 2.015966
2015-09-26 17:56:23.395952 1 3 1 1.443315e+09 60.067231 61.799003
2015-09-26 17:56:25.161067 1 4 1 1.443315e+09 1.765115 1.765115
2015-09-26 17:56:27.000792 1 5 1 1.443315e+09 1.839725 1.839725

In [11]:
plt.plot(fails.timesec - fails.timesec.iloc[0], fails.timeDelta.values, 'o')
plt.axhline(fails.timeDelta.mean())


Out[11]:
<matplotlib.lines.Line2D at 0x1078ebb50>

In [12]:
res.index


Out[12]:
<class 'pandas.tseries.index.DatetimeIndex'>
[2015-09-26 17:55:19.580983, ..., 2015-09-27 00:42:09.933048]
Length: 4951, Freq: None, Timezone: None

In [13]:
fails.plot(kind='scatter',x='record', y='timeDelta')
fails.plot(y='timeDelta')


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x107cb0690>

In [14]:
failure_rate = len(fails) / len(res)
print failure_rate


0.0551403756817

In [15]:
failfig, ax = plt.subplots()
fails.hist('timeDelta', bins=100, ax=ax)
ax.set_xlabel('Time Diffs (sec)')
ax.set_ylabel('Freq')
failfig.savefig('failTimeDiffs')



In [16]:
success.hist('timeDelta', bins=100)


Out[16]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x108084d10>]], dtype=object)

In [25]:
from scipy.signal import lombscargle
import numpy as np

In [50]:
periods = np.arange(0.5, 400., 0.5)
angfreq =  2 * np.pi / periods

In [51]:
power = lombscargle(res.timesec.values, res.success.values - res.success.mean(), angfreq)

In [32]:
# power *= 2.0 / (len(res) * res.success.std() ** 2)

In [52]:
# plot the results
xt = np.arange(0., 400., 20.)
fig, ax = plt.subplots()
ax.plot(periods, power)
ax.set(ylim=(0, 0.8), xlabel='period (sec)',
       ylabel='Lomb-Scargle Power');
ax.set_xticks(xt)
ax.grid(True)
# xtl = ax.get_xticklabels(visi)
# ax.set_xticklabels(xtl, rotation=30)



In [47]:
power


Out[47]:
array([ nan,  nan,  nan, ...,  nan,  nan,  nan])

In [17]:
from gatspy.periodic import LombScargle

In [40]:
model = LombScargle().fit(res.timesec.values, res.success.values, np.zeros(len(res.success.values)))

In [42]:
periods, power = model.periodogram_auto(nyquist_factor=10)