In [ ]:
%matplotlib notebook
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import utide
print(utide.__version__)
Look at the data file to see what structure it has.
In [ ]:
with open('can1998.dtf') as f:
lines = f.readlines()
print(''.join(lines[:30]))
It looks like the fields are seconds, year, month, day, hour, elevation, flag. We need a date parser function to combine the date and time fields into a single value to be used as the datetime index.
In [ ]:
def date_parser(year, month, day, hour):
year, month, day, hour = map(int, (year, month, day, hour))
return datetime.datetime(year, month, day, hour)
# Names of the columns that will be used to make a "datetime" column:
parse_dates = dict(datetime=['year', 'month', 'day','hour'])
# Names of the original columns in the file, including only
# the ones we will use; we are skipping the first, which appears
# to be seconds from the beginning.
names = ['year', 'month', 'day', 'hour', 'elev', 'flag']
obs = pd.read_table('can1998.dtf',
names=names,
skipinitialspace=True,
delim_whitespace=True,
index_col='datetime',
usecols=range(1, 7),
na_values='9.990',
parse_dates=parse_dates,
date_parser=date_parser,
)
obs.head(6)
Although there are no elevations marked bad via special value, which should be nan
after reading the file, the flag value of 2 indicates the values are unreliable, so we will mark them with nan
, calculate the deviations of the elevations from their mean (stored in a new column called "anomaly"), and then interpolate to fill in the nan
values in the anomaly.
In [ ]:
bad = obs['flag'] == 2
corrected = obs['flag'] == 1
obs.loc[bad, 'elev'] = np.nan
obs['anomaly'] = obs['elev'] - obs['elev'].mean()
obs['anomaly'] = obs['anomaly'].interpolate()
print('{} points were flagged "bad" and interpolated'.format(bad.sum()))
print('{} points were flagged "corrected" and left unchanged'.format(corrected.sum()))
The utide package works with ordinary numpy arrays, not with Pandas Series or Dataframes, so we need to make a time
variable in floating point days since a given epoch, and use the values
attribute of the elevation anomaly (a Pandas Series) to extract the underlying numpy ndarray.
In [ ]:
time = mdates.date2num(obs.index.to_pydatetime())
coef = utide.solve(time, obs['anomaly'].values,
lat=-25,
method='ols',
conf_int='MC')
The amplitudes and phases from the fit are now in the coef
data structure (a Bunch), which can be used directly in the reconstruct
function to generate a hindcast or forecast of the tides at the times specified in the time
array.
In [ ]:
print(coef.keys())
In [ ]:
tide = utide.reconstruct(time, coef)
The output from the reconstruction is also a Bunch:
In [ ]:
print(tide.keys())
In [ ]:
#t = obs.index.values # dtype is '<M8[ns]' (numpy datetime64)
# It is more efficient to supply the time directly as matplotlib
# datenum floats:
t = tide.t_mpl
fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, sharey=True, sharex=True)
ax0.plot(t, obs.anomaly, label=u'Observations', color='C0')
ax1.plot(t, tide.h, label=u'Tide Fit', color='C1')
ax2.plot(t, obs.anomaly - tide.h, label=u'Residual', color='C2')
ax2.xaxis_date()
fig.legend(ncol=3, loc='upper center')
fig.autofmt_xdate()
In [ ]: