In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib notebook
In [3]:
# The following code is adopted from Pat's Rolling Rain N-Year Threshold.pynb
# Loading in hourly rain data from CSV, parsing the timestamp, and adding it as an index so it's more useful
rain_df = pd.read_csv('data/ohare_hourly_observations.csv')
rain_df['datetime'] = pd.to_datetime(rain_df['datetime'])
rain_df = rain_df.set_index(pd.DatetimeIndex(rain_df['datetime']))
chi_rain_series = rain_df['hourly_precip'].resample('1H').mean()
In [4]:
plt.figure()
ax = plt.gca()
ax.plot(np.diff(rain_df.index.astype(np.int64) // 10**9) * -1, lw=1)
ax.hold(True)
ax.plot(ax.get_xlim(), [60, 60], '--')
ax.plot(ax.get_xlim(), [120, 120], '--')
ax.plot(ax.get_xlim(), [3600, 3600], '--')
ax.plot(ax.get_xlim(), [86400, 86400], '--')
ax.plot(ax.get_xlim(), [604800, 604800], '--')
ax.set_yscale('log')
ax.legend(['seconds between consecutive timestamp', '60 sec', '120 sec', '1 hr', '1 day', '1 week'], loc='upper center')
ax.set_ylabel('Time in seconds (log scale)')
ax.set_xlabel('integer index of data frame')
Out[4]:
In [5]:
tm = (rain_df.index.astype(np.int64) // 10**9)
large_gaps = np.where(np.diff(tm) < -3600*24)[0]
rain_df.iloc[sorted(np.hstack((large_gaps, large_gaps+1)))]['Unnamed: 0']
Out[5]:
In [ ]: