The Boston Light Swim

Sea Surface Temperature time-series model skill

Load configuration


In [1]:
import os
try:
    import cPickle as pickle
except ImportError:
    import pickle


run_name = '2015-08-17'
fname = os.path.join(run_name, 'config.pkl')
with open(fname, 'rb') as f:
    config = pickle.load(f)

In [2]:
import numpy as np
from pandas import DataFrame, read_csv
from utilities import to_html, save_html, apply_skill


fname = '{}-all_obs.csv'.format(run_name)
all_obs = read_csv(os.path.join(run_name, fname), index_col='name')


def rename_cols(df):
    columns = dict()
    for station in df.columns:
        mask = all_obs['station'].astype(str) == station
        name = all_obs['station'][mask].index[0]
        columns.update({station: name})
    return df.rename(columns=columns)

In [3]:
from glob import glob
from pandas import Panel
from utilities import nc2df


def load_ncs(run_name):
    fname = '{}-{}.nc'.format
    ALL_OBS_DATA = nc2df(os.path.join(run_name,
                                      fname(run_name, 'OBS_DATA')))
    index = ALL_OBS_DATA.index
    dfs = dict(OBS_DATA=ALL_OBS_DATA)
    for fname in glob(os.path.join(run_name, "*.nc")):
        if 'OBS_DATA' in fname:
            continue
        else:
            model = fname.split('.')[0].split('-')[-1]
            df = nc2df(fname)
            # FIXME: Horrible work around duplicate times.
            if len(df.index.values) != len(np.unique(df.index.values)):
                kw = dict(subset='index', take_last=True)
                df = df.reset_index().drop_duplicates(**kw).set_index('index')
            kw = dict(method='time', limit=30)
            df = df.reindex(index).interpolate(**kw).ix[index]
            dfs.update({model: df})

    return Panel.fromDict(dfs).swapaxes(0, 2)

Skill 1: Model Bias (or Mean Bias)

The bias skill compares the model mean temperature against the observations. It is possible to introduce a Mean Bias in the model due to a mismatch of the boundary forcing and the model interior.

$$ \text{MB} = \mathbf{\overline{m}} - \mathbf{\overline{o}}$$

In [4]:
from utilities import mean_bias

dfs = load_ncs(run_name)

df = apply_skill(dfs, mean_bias, remove_mean=False, filter_tides=False)
df = rename_cols(df)
skill_score = dict(mean_bias=df.copy())

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'mean_bias.html'.format(run_name))
save_html(fname, html)
html


Out[4]:
COAWST_4 G1SST__SST HYCOM NECOFS_FVCOM NECOFS_GOM3_FVCOM
Boston 16 Nm East Of Boston 0.78 0.37 2.08 8.16 0.60
Buoy A01 0.75 0.39 0.98 9.14 0.78
Boston, MA 5.68 0.45 -- 3.84 0.92

Skill 2: Central Root Mean Squared Error

Root Mean Squared Error of the deviations from the mean.

$$ \text{CRMS} = \sqrt{\left(\mathbf{m'} - \mathbf{o'}\right)^2}$$

where: $\mathbf{m'} = \mathbf{m} - \mathbf{\overline{m}}$ and $\mathbf{o'} = \mathbf{o} - \mathbf{\overline{o}}$


In [5]:
from utilities import rmse

dfs = load_ncs(run_name)

df = apply_skill(dfs, rmse, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['rmse'] = df.copy()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'rmse.html'.format(run_name))
save_html(fname, html)
html


Out[5]:
COAWST_4 G1SST__SST HYCOM NECOFS_FVCOM NECOFS_GOM3_FVCOM
Boston 16 Nm East Of Boston 0.70 0.34 0.72 0.58 0.73
Buoy A01 0.78 0.46 0.70 0.71 0.67
Boston, MA 0.43 0.55 -- 0.43 0.40

In [6]:
from utilities import r2

dfs = load_ncs(run_name)

df = apply_skill(dfs, r2, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['r2'] = df.copy()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'r2.html'.format(run_name))
save_html(fname, html)
html


Out[6]:
COAWST_4 G1SST__SST HYCOM NECOFS_FVCOM NECOFS_GOM3_FVCOM
Boston 16 Nm East Of Boston -0.31 -0.03 -0.42 0.08 -0.21
Buoy A01 -0.54 -0.19 -0.26 -0.30 -0.00
Boston, MA 0.05 -0.11 -- 0.05 -0.04

In [7]:
fname = os.path.join(run_name, 'skill_score.pkl')
with open(fname,'wb') as f:
    pickle.dump(skill_score, f)

Normalized Taylor diagrams

The radius is model standard deviation error divided by observations deviation, azimuth is arc-cosine of cross correlation (R), and distance to point (1, 0) on the abscissa is Centered RMS.


In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
from utilities.taylor_diagram import TaylorDiagram


def make_taylor(samples):
    fig = plt.figure(figsize=(9, 9))
    dia = TaylorDiagram(samples['std']['OBS_DATA'],
                        fig=fig,
                        label="Observation")
    colors = plt.matplotlib.cm.jet(np.linspace(0, 1, len(samples)))
    # Add samples to Taylor diagram.
    samples.drop('OBS_DATA', inplace=True)
    for model, row in samples.iterrows():
        dia.add_sample(row['std'], row['corr'], marker='s', ls='',
                       label=model)
    # Add RMS contours, and label them.
    contours = dia.add_contours(colors='0.5')
    plt.clabel(contours, inline=1, fontsize=10)
    # Add a figure legend.
    kw = dict(prop=dict(size='small'), loc='upper right')
    leg = fig.legend(dia.samplePoints,
                     [p.get_label() for p in dia.samplePoints],
                     numpoints=1, **kw)
    return fig

In [9]:
dfs = load_ncs(run_name)

# Bin and interpolate all series to 1 hour.
freq = '30min'
for station, df in list(dfs.iteritems()):
    df = df.resample(freq).interpolate().dropna(axis=1)
    if 'OBS_DATA' in df:
        samples = DataFrame.from_dict(dict(std=df.std(),
                                           corr=df.corr()['OBS_DATA']))
    else:
        continue
    samples[samples < 0] = np.NaN
    samples.dropna(inplace=True)
    if len(samples) <= 2:  # 1 obs 1 model.
        continue
    fig = make_taylor(samples)
    fig.savefig(os.path.join(run_name, '{}.png'.format(station)))
    plt.close(fig)


c:\programs\anaconda-64\envs\swim\lib\site-packages\matplotlib\text.py:52: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  if rotation in ('horizontal', None):
c:\programs\anaconda-64\envs\swim\lib\site-packages\matplotlib\text.py:54: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  elif rotation == 'vertical':