In [43]:
%pylab inline
import os
import sys
import math
import datetime
import pandas as pd
import scipy.signal as signl
from matplotlib import dates as mdates
#--default matplotlib
rcParams['mathtext.default'] = 'regular'
rcParams['legend.fontsize'] = 7
rcParams['axes.labelsize'] = 8
rcParams['xtick.labelsize'] = 8
rcParams['ytick.labelsize'] = 8
rcParams['figure.subplot.wspace'] = 0.35
rcParams['figure.subplot.hspace'] = 0.35
days = mdates.DayLocator() # every day
hours = mdates.HourLocator() # every hour
daysFmt = mdates.DateFormatter('%m/%d')
def set_daysFmt(ax):
ax.xaxis.set_major_formatter(daysFmt)
ax.xaxis.set_minor_locator(hours)
Read and parse data
In [44]:
#function to parse data and optionally normalize and detrend the data
def parse_data(fname,normalize=False):
data_dtype = dtype([('datetime','a20'),('head','f8')])
raw = loadtxt(fname, delimiter=',', dtype=data_dtype, comments='#')
dtime = []
stage = []
for [d,v] in raw:
dt = datetime.datetime.strptime(d, '%m/%d/%Y %H:%M:%S')
dtime.append(dt)
stage.append(v)
dtime = array(dtime)
stage = array(stage)
if normalize:
stage -= average(stage)
stage = signl.detrend(stage)
return dtime, stage
Process two time series files with different sampling frequencies
In [45]:
data_dir = os.path.join('data')
file1 = 'DE_Tide.csv'
file2 = 'DE_S123H.csv'
datetime1, v1 = parse_data(os.path.join(data_dir,file1))
datetime2, v2 = parse_data(os.path.join(data_dir,file2))
#--maximum date range for plotting
dt0 = min(datetime1[0], datetime2[0])
dt1 = max(datetime1[-1], datetime2[-1])
#--plot raw data
ax = subplot(1,1,1)
ax.plot(datetime1, v1, linewidth=2.0, label='v1')
ax.plot(datetime2, v2, linewidth=1.0, label='v2')
set_daysFmt(ax)
l = ax.legend(loc='best')
l.draw_frame(False)
ax.set_xlim(dt0,dt1)
show()
In [46]:
dt_min = max(datetime1[0], datetime2[0])
dt_max = min(datetime1[-1], datetime2[-1])
dt_sec = datetime.timedelta(seconds=6. * 60.)
pdatetime_index = []
dt_on = dt_min
while dt_on <= dt_max:
pdatetime_index.append(dt_on)
dt_on += dt_sec
a = pd.Series(v1,index=datetime1)
b = pd.Series(v2,index=datetime2)
print a.head()
print b.head()
c = a.reindex(set(a.index).union(pdatetime_index)).sort_index().interpolate('time').ix[pdatetime_index]
print c.head()
d = b.reindex(set(b.index).union(pdatetime_index)).sort_index().interpolate('time').ix[pdatetime_index]
print d.head()
In [47]:
#--plot resampled data
figure(num=None, figsize=(10, 5), dpi=300, facecolor='w', edgecolor='k')
ax = subplot(1,1,1)
ax.plot(pdatetime_index, d, linewidth=2.0, color='green', label='v2 interpolated')
ax.plot(datetime2, v2, linewidth=0.0, marker='o', markersize=2.5, color='black', label='v2')
set_daysFmt(ax)
l = ax.legend(loc='best')
l.draw_frame(False)
ax.set_xlim(dt0,dt1)
show()
In [47]: