In [43]:
%pylab inline
import os
import sys
import math
import datetime
import pandas as pd
import scipy.signal as signl
from matplotlib import dates as mdates
#--default matplotlib 
rcParams['mathtext.default'] = 'regular'
rcParams['legend.fontsize']  = 7
rcParams['axes.labelsize']   = 8
rcParams['xtick.labelsize']  = 8
rcParams['ytick.labelsize']  = 8
rcParams['figure.subplot.wspace'] = 0.35
rcParams['figure.subplot.hspace'] = 0.35
days   = mdates.DayLocator()  # every day
hours   = mdates.HourLocator()  # every hour
daysFmt = mdates.DateFormatter('%m/%d')
def set_daysFmt(ax):
    ax.xaxis.set_major_formatter(daysFmt)
    ax.xaxis.set_minor_locator(hours)


Populating the interactive namespace from numpy and matplotlib

Read and parse data


In [44]:
#function to parse data and optionally normalize and detrend the data
def parse_data(fname,normalize=False):
    data_dtype = dtype([('datetime','a20'),('head','f8')])
    raw = loadtxt(fname, delimiter=',', dtype=data_dtype, comments='#')
    dtime = []
    stage = []
    for [d,v] in raw:
        dt = datetime.datetime.strptime(d, '%m/%d/%Y %H:%M:%S')
        dtime.append(dt)
        stage.append(v)
    dtime = array(dtime)
    stage = array(stage)
    if normalize:
        stage -= average(stage)
        stage = signl.detrend(stage)
    return dtime, stage

Process two time series files with different sampling frequencies


In [45]:
data_dir = os.path.join('data')
file1 = 'DE_Tide.csv'
file2 = 'DE_S123H.csv'
datetime1, v1 = parse_data(os.path.join(data_dir,file1))
datetime2, v2 = parse_data(os.path.join(data_dir,file2))
#--maximum date range for plotting
dt0 = min(datetime1[0], datetime2[0])
dt1 = max(datetime1[-1], datetime2[-1])
#--plot raw data
ax = subplot(1,1,1)
ax.plot(datetime1, v1, linewidth=2.0, label='v1')
ax.plot(datetime2, v2, linewidth=1.0, label='v2')
set_daysFmt(ax)
l = ax.legend(loc='best')   
l.draw_frame(False)
ax.set_xlim(dt0,dt1)
show()



In [46]:
dt_min = max(datetime1[0], datetime2[0])
dt_max = min(datetime1[-1], datetime2[-1])
dt_sec = datetime.timedelta(seconds=6. * 60.)
pdatetime_index = []
dt_on = dt_min
while dt_on <= dt_max:
    pdatetime_index.append(dt_on)
    dt_on += dt_sec
a = pd.Series(v1,index=datetime1)
b = pd.Series(v2,index=datetime2)
print a.head()
print b.head()

c = a.reindex(set(a.index).union(pdatetime_index)).sort_index().interpolate('time').ix[pdatetime_index]
print c.head()

d = b.reindex(set(b.index).union(pdatetime_index)).sort_index().interpolate('time').ix[pdatetime_index]
print d.head()


1999-06-24 08:36:00    0.308762
1999-06-24 08:42:00    0.301142
1999-06-24 08:48:00    0.291998
1999-06-24 08:54:00    0.283159
1999-06-24 09:00:00    0.273406
dtype: float64
1999-06-20 00:00:00    0.670566
1999-06-20 00:10:00    0.673614
1999-06-20 00:15:00    0.676662
1999-06-20 00:40:00    0.682758
1999-06-20 00:45:00    0.682758
dtype: float64
1999-06-24 08:36:00    0.308762
1999-06-24 08:42:00    0.301142
1999-06-24 08:48:00    0.291998
1999-06-24 08:54:00    0.283159
1999-06-24 09:00:00    0.273406
dtype: float64
1999-06-24 08:36:00    0.711613
1999-06-24 08:42:00    0.714458
1999-06-24 08:48:00    0.716287
1999-06-24 08:54:00    0.716287
1999-06-24 09:00:00    0.720554
dtype: float64

In [47]:
#--plot resampled data
figure(num=None, figsize=(10, 5), dpi=300, facecolor='w', edgecolor='k')
ax = subplot(1,1,1)
ax.plot(pdatetime_index, d, linewidth=2.0, color='green', label='v2 interpolated')
ax.plot(datetime2, v2, linewidth=0.0, marker='o', markersize=2.5, color='black', label='v2')
set_daysFmt(ax)
l = ax.legend(loc='best')   
l.draw_frame(False)
ax.set_xlim(dt0,dt1)
show()



In [47]: