The timeseries module provides the TimeSeries class which deals with all 1D timeseries (sizedistribution timeseries are considered 2D timeseries and are delt with in a different place). Usually when reading in any type of data, that is in some way a time series it will end up beeing a TimeSeries instance. Data in a TimeSeries is stored at TimeSeries.data in form of a pandas DataFrame instance.
In [4]:
from atmPy.general import timeseries
from atmPy.aerosols.instruments.POPS import housekeeping
from atmPy.aerosols.instruments.piccolo import piccolo
from atmPy.data_archives.arm import read_data
In [5]:
# %matplotlib nbagg
%matplotlib inline
In [7]:
from hagpack import plot_templates
In [8]:
read_data.arm_products.keys()
Out[8]:
In [9]:
fname = '/Users/htelg/data/ARM/SGP/'
out = read_data.read_cdf(fname, data_product=['noaaaos', 'aosacsm'],
time_window=('2012-02-01','2012-02-15'),
data_quality='patchy')
noaaaos = out['noaaaos']
scatt = noaaaos.scatt_coeff._del_all_columns_but('Bs_G_Dry_1um_Neph3W_1')
acsm = out['aosacsm']
acsm = acsm.mass_concentrations._del_all_columns_but('total')
In [13]:
dt = (np.arange(0,200,5, dtype=int), 'm')
dt
Out[13]:
In [17]:
1000/60
Out[17]:
In [22]:
dt = (np.unique(np.logspace(np.log10(1), np.log10(10000), 50, dtype=int)), 'm')
dt
Out[22]:
In [23]:
roll_n = acsm.rolling(#.data['Bs_G_Dry_1um_Neph3W_1'],
(1, 'D'),
min_good_ratio = 0.1,)
out, dt_max = roll_n.corr_timelag(scatt, dt = dt, no_of_steps=40, center = 200,
normalize = True)
corr = roll_n.corr(scatt)
In [29]:
a = plot_templates.plot_rolling_time_laps_corr(out, dt_max, clim = (0,1))
a.set_yscale('log')
In [57]:
def aaa(line):
if (np.isnan(line)).sum() > ((~np.isnan(line)).sum() * 0.3):
return np.nan
col_t = cols.copy()
lt = line[~ np.isnan(line)]
col_t = col_t[~ np.isnan(line)]
argmax = lt.argmax()
realMax = col_t[argmax]
return realMax
cols = out.data.columns
dt_max = np.apply_along_axis(aaa, 1, out.data.values)
# values = out.data.iloc[0].values
# np.argmax(values)
In [58]:
plt.plot(dt_max)
Out[58]:
In [31]:
out.data.iloc[10,:]
Out[31]:
In [25]:
dt_max
Out[25]:
In [21]:
plt.plot(dt_max)
Out[21]:
In [15]:
hasattr(np.array([654]), '__len__')
Out[15]:
In [ ]:
In [276]:
%%time
scatt_alig = scatt.align_to(acsm)
roll = scatt_alig.data['Bs_G_Dry_1um_Neph3W_1'].rolling(30, min_periods=1, center = True)
out = roll.corr(acsm.data['total'])
# td = np.timedelta64(15*1860,'s')
# out.index -= td
In [277]:
%%time
roll = acsm.rolling(scatt, (1800 * 30, 's') , min_good_ratio=0.04)
out_old = roll.correlation()
In [278]:
f,a = plt.subplots()
out.plot(ax = a, label = 'new')
out_old.plot(ax = a, label = 'old')
out_n.plot(ax = a, label = 'newnew')
a.legend()
# a.set_ylim((0,2))
Out[278]:
In [88]:
%%time
scatt_alig = scatt.align_to(acsm)
roll = scatt_alig.data['Bs_G_Dry_1um_Neph3W_1'].rolling(30, min_periods=1, center = True)
out = roll.corr(acsm.data['total'])
# td = np.timedelta64(15*1860,'s')
# out.index -= td
In [ ]:
pd.core.window.Rolling()
In [ ]:
roll.corr()
In [395]:
from hagpack import plot_templates
In [446]:
class Rolling(pd.core.window.Rolling):
def __init__(self, obj, window, min_good_ratio=0.67,
verbose = True,
**kwargs):
self.data = obj
window = np.timedelta64(window[0], window[1])
window = int(window / np.timedelta64(int(obj._data_period), 's'))
min_periods = int(window * min_good_ratio)
if obj.data.columns.shape[0] == 1:
self._data_column = obj.data.columns[0]
else:
txt = 'please make sure the timeseries has only one collumn'
raise ValueError(txt)
if verbose:
print('Each window contains %s data points of which at least %s are not nan.' % (window,
min_periods))
super().__init__(obj.data[self._data_column],
window,
min_periods = min_periods,
**kwargs)
# print(self.window)
def corr(self, other, *args, **kwargs):
if other.data.columns.shape[0] == 1:
other_column = other.data.columns[0]
else:
txt = 'please make sure the timeseries has only one collumn'
raise ValueError(txt)
# other_column = 'Bs_G_Dry_1um_Neph3W_1'
other = other.align_to(self.data)
other = other.data[other_column]
corr_res = super().corr(other, *args, **kwargs)
corr_res_ts = timeseries.TimeSeries(pd.DataFrame(corr_res))
corr_res_ts._data_period = self.data._data_period
return corr_res_ts
def corr_timelag(self, other, dt = (5, 'm'), no_of_seps = 10, center = 0, normalize = True, **kwargs):
if other.data.columns.shape[0] == 1:
other_column = other.data.columns[0]
else:
txt = 'please make sure the timeseries has only one collumn'
raise ValueError(txt)
# other = acsm.copy()
# dt = (5, 'm')
# no_of_seps = 10
# center = 0
dt_array = np.arange(0, dt[0] * no_of_seps, dt[0]) - int(no_of_seps * dt[0]/2)
out = False
for dtt in dt_array:
tst = other.copy()
tst.data.index += np.timedelta64(int(dtt),dt[1])
corr = self.corr(tst)
if not out:
out = corr
out.data.columns = [dtt]
else:
out.data[dtt] = corr.data#[self._data_column]
if normalize:
out = timeseries.TimeSeries_2D(out.data.apply(lambda line: line/line.max(), axis = 1))
else:
out = timeseries.TimeSeries_2D(out.data)
cols = out.data.columns
dt_max = np.apply_along_axis(lambda arr: cols[arr.argmax()], 1, out.data.values)
dt_max = timeseries.TimeSeries(pd.DataFrame(dt_max, index = out.data.index))
if dt[1] == 'm':
ylt = 'min.'
else:
ylt = dt[1]
dt_max._y_label = 'Time lag (%s)'%ylt
return out, dt_max
In [447]:
roll_n = Rolling(acsm, #.data['Bs_G_Dry_1um_Neph3W_1'],
(1, 'D'),
min_good_ratio = 0.1,
center = True)
out, dt_max = roll_n.corr_timelapse(scatt, dt = (10, 'm'), no_of_seps=40,
normalize = True)
corr = roll_n.corr(scatt)
In [448]:
plot_templates.plot_rolling_time_laps_corr(out, dt_max, corr)
Out[448]:
In [ ]:
###### f,a,_,_ =out.plot()
dt_max.plot(ax = a)
corr.plot(ax = a)
In [ ]:
In [279]:
%%time
roll_n = Rolling(acsm, #.data['Bs_G_Dry_1um_Neph3W_1'],
(1800 * 30, 's'),
min_good_ratio = 0.1,
center = True)
out_n = roll_n.corr(scatt)
In [339]:
%%time
roll = acsm.rolling(scatt, (1800 * 30, 's') , min_good_ratio=0.04)
tl_corr_old = roll.timelaps_correlation()
In [340]:
tl_corr_old[1].plot()
Out[340]:
In [280]:
f,a = plt.subplots()
out.plot(ax = a)
out_n.plot(ax = a )
Out[280]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [64]:
scatt_avg = scatt.average_time((3,'h'))
scatt_avg_n = scatt.average_time_new((3,'h'))
In [65]:
scatt_align = scatt.align_to(scatt_avg)
scatt_align_n = scatt.align_to_new(scatt_avg_n)
In [10]:
f,a = plt.subplots()
scatt.plot(ax = a)
scatt_avg.plot(ax = a)
scatt_avg_n.plot(ax = a)
g = a.get_lines()[-1]
g.set_drawstyle('steps-post')
# scatt_align.plot(ax = a)
scatt_align_n.plot(ax = a)
g = a.get_lines()[-1]
g.set_drawstyle('steps-post')
In [ ]:
In [3]:
read_data.arm_products.keys()
Out[3]:
In [4]:
fname = '/Users/htelg/data/ARM/SGP/'
out = read_data.read_cdf(fname, data_product=['tdmaapssize', 'aosacsm'],
time_window=('2012-02-01','2012-02-10'),
data_quality='patchy')
tdma = out['tdmaapssize']
acsm = out['aosacsm']
In [5]:
tv = tdma.size_distribution.particle_volume_concentration
av = acsm.mass_concentrations._del_all_columns_but('total')
In [6]:
roll = tv.rolling(av,(5,'D'))
tlc_max, tlc_tlc = roll.timelaps_correlation(no_of_steps=20)
In [7]:
f,a,_,_ = tlc_tlc.plot()
tlc_max.plot(ax = a)
Out[7]:
In [20]:
%%timeit
a = np.zeros(3) * np.nan
a
In [21]:
%%timeit
a = np.zeros(3)
a[:] = np.nan
a
In [14]:
a
Out[14]:
In [ ]:
fname = '/Users/htelg/data/ARM/SGP/sgpnoaaaosC1.b1.20120102.000000.cdf'
ts = read_data.read_cdf(fname).scatt_coeff
In [ ]:
ts = ts._del_all_columns_but('Bs_G_Dry_1um_Neph3W_1')
In [ ]:
ts.plot()
Usually a TimeSeries is generated when data representing a timeseries is loaded, e.g. POPS housekeeping data, or piccolo telemetry data.
In [6]:
pops_hk = housekeeping.read_csv('./data/POPS_housekeeping.csv')
picco_tel = piccolo.read_csv('./data/piccolo.log')
In [ ]:
fname = '/Users/htelg/data/ARM/SGP/sgpnoaaaosC1.b1.20120102.0000'
ts = read_data.read_cdf(fname).scatt_coeff
In [ ]:
ts = ts._del_all_columns_but('Bs_G_Dry_1um_Neph3W_1')
In [ ]:
ts.plot()
In [ ]:
bla = ts.plot_wrapped(period=5, max_wraps=23)
This attribute of a TimeSeries class allows the projection of a different instance on the current one. The resulting instance will have the collumns of both instances but with the time axes according to the first instance.
In [ ]:
pops_hk_merged = pops_hk.merge(picco_tel)
# pops_hk_merged.data
After merging it is possible to plot for example the Particle rate as a function of altitude. Columns which where previously in different Timeseries with different time intervals and time values.
In [ ]:
f,ax = plt.subplots()
ax.plot(pops_hk_merged.data.Particle_rate_nops, pops_hk_merged.data.Altitude)
ax.set_ylabel('Altitude (m)')
ax.set_xlabel('Particle rate (#/s)')
When a TimeSeries instance has data columns named 'Lat' and 'Lon' it is possible to do a quick drowing of e.g. fligh path on a map. This uses matplotlib basemap.
In [ ]:
pops_hk_merged.plot_map(resolution = 'f')
In [ ]:
data_product = ['aosacsm', 'noaaaos']
time_window=('2012-01-01', '2012-04-01')
folder = '/Users/htelg/data/ARM/SGP/'
out = read_data.read_cdf(folder, data_product=data_product, time_window=time_window, data_quality='good', verbose=False)
aosacsm = out['aosacsm']
noaaaos = out['noaaaos']
acsm_tm = timeseries.TimeSeries(pd.DataFrame(aosacsm.mass_concentrations.data['total']))
acsm_tm._data_period = aosacsm._data_period
noaa_scatt = timeseries.TimeSeries(pd.DataFrame(noaaaos.scatt_coeff.data['Bs_G_Dry_1um_Neph3W_1']))
noaa_scatt._data_period = noaaaos._data_period
In [ ]:
out = acsm_tm.correlate_to(noaa_scatt)
out.plot_pearsonANDoriginal_data()
In [ ]:
out = acsm_tm.rolling_correlation(noaa_scatt, (7,'D'))
out.plot()