This notebook calculates several skill scores for the SECOORA models weekly time-series saved by 00-fetch_data.ipynb.
In [1]:
import os
try:
import cPickle as pickle
except ImportError:
import pickle
run_name = '2014-07-07'
fname = os.path.join(run_name, 'config.pkl')
with open(fname, 'rb') as f:
config = pickle.load(f)
In [2]:
import numpy as np
from pandas import DataFrame, read_csv
from utilities import (load_secoora_ncs, to_html,
save_html, apply_skill)
fname = '{}-all_obs.csv'.format(run_name)
all_obs = read_csv(os.path.join(run_name, fname), index_col='name')
def rename_cols(df):
columns = dict()
for station in df.columns:
mask = all_obs['station'] == station
name = all_obs['station'][mask].index[0]
columns.update({station: name})
return df.rename(columns=columns)
The bias skill compares the model mean elevation against the observations. Because the observations were saved in NAVD datum, the deviation is usually a datum difference between the model forcings and the observations. (This can be confirmed via the constant bias observed at different runs.)
$$ \text{MB} = \mathbf{\overline{m}} - \mathbf{\overline{o}}$$
In [3]:
from utilities import mean_bias
dfs = load_secoora_ncs(run_name)
df = apply_skill(dfs, mean_bias, remove_mean=False, filter_tides=False)
df = rename_cols(df)
skill_score = dict(mean_bias=df.copy())
# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')
html = to_html(df.T)
fname = os.path.join(run_name, 'mean_bias.html'.format(run_name))
save_html(fname, html)
html
Out[3]:
In [4]:
from utilities import rmse
dfs = load_secoora_ncs(run_name)
df = apply_skill(dfs, rmse, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['rmse'] = df.copy()
# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')
html = to_html(df.T)
fname = os.path.join(run_name, 'rmse.html'.format(run_name))
save_html(fname, html)
html
Out[4]:
In [5]:
from utilities import r2
dfs = load_secoora_ncs(run_name)
df = apply_skill(dfs, r2, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['r2'] = df.copy()
# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')
html = to_html(df.T)
fname = os.path.join(run_name, 'r2.html'.format(run_name))
save_html(fname, html)
html
Out[5]:
In [6]:
from utilities import r2
dfs = load_secoora_ncs(run_name)
df = apply_skill(dfs, r2, remove_mean=True, filter_tides=True)
df = rename_cols(df)
skill_score['low_pass_r2'] = df.copy()
# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')
html = to_html(df.T)
fname = os.path.join(run_name, 'low_pass_r2.html'.format(run_name))
save_html(fname, html)
html
Out[6]:
In [7]:
from utilities import r2
dfs = load_secoora_ncs(run_name)
# SABGOM dt = 3 hours.
dfs = dfs.swapaxes('items', 'major').resample('3H').swapaxes('items', 'major')
df = apply_skill(dfs, r2, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['low_pass_resampled_3H_r2'] = df.copy()
# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')
html = to_html(df.T)
fname = os.path.join(run_name, 'low_pass_resampled_3H_r2.html'.format(run_name))
save_html(fname, html)
html
Out[7]:
In [8]:
fname = os.path.join(run_name, 'skill_score.pkl')
with open(fname,'wb') as f:
pickle.dump(skill_score, f)
In [9]:
%matplotlib inline
import matplotlib.pyplot as plt
from utilities.taylor_diagram import TaylorDiagram
def make_taylor(samples):
fig = plt.figure(figsize=(9, 9))
dia = TaylorDiagram(samples['std']['OBS_DATA'],
fig=fig,
label="Observation")
colors = plt.matplotlib.cm.jet(np.linspace(0, 1, len(samples)))
# Add samples to Taylor diagram.
samples.drop('OBS_DATA', inplace=True)
for model, row in samples.iterrows():
dia.add_sample(row['std'], row['corr'], marker='s', ls='',
label=model)
# Add RMS contours, and label them.
contours = dia.add_contours(colors='0.5')
plt.clabel(contours, inline=1, fontsize=10)
# Add a figure legend.
kw = dict(prop=dict(size='small'), loc='upper right')
leg = fig.legend(dia.samplePoints,
[p.get_label() for p in dia.samplePoints],
numpoints=1, **kw)
return fig
In [10]:
dfs = load_secoora_ncs(run_name)
# Bin and interpolate all series to 1 hour.
freq = '1H'
for station, df in list(dfs.iteritems()):
df = df.resample(freq).interpolate().dropna(axis=1)
if 'OBS_DATA' in df:
samples = DataFrame.from_dict(dict(std=df.std(),
corr=df.corr()['OBS_DATA']))
else:
continue
samples[samples < 0] = np.NaN
samples.dropna(inplace=True)
if len(samples) <= 2: # 1 obs 1 model.
continue
fig = make_taylor(samples)
fig.savefig(os.path.join(run_name, '{}.png'.format(station)))
plt.close(fig)