In [1]:
import pandas as pd
In [2]:
ls
In [3]:
obs_df = pd.read_csv('obs.csv')
pred_df = pd.read_csv('emos_network_train_2015_pred_2016.csv')
In [5]:
obs_df.head()
Out[5]:
In [6]:
pred_df.head()
Out[6]:
In [7]:
len(obs_df)
Out[7]:
In [8]:
len(pred_df)
Out[8]:
In [14]:
obs_df = obs_df.sort_values(['date', 'station'])
pred_df = pred_df.sort_values(['date', 'station_id'])
In [16]:
obs_df['date'] == pred_df['date']
Out[16]:
In [17]:
from pandas.util.testing import assert_frame_equal
In [21]:
obs_df['date'].equals(pred_df['date'])
Out[21]:
In [22]:
obs_df['station'].equals(pred_df['station_id'])
Out[22]:
In [23]:
obs_df = obs_df.sort_values(['station', 'date'])
In [24]:
obs_df['date'].equals(pred_df['date'])
Out[24]:
In [25]:
obs_df = obs_df.sort_values(['date', 'station'])
In [28]:
from scipy.stats import norm
import numpy as np
def crps_normal(mu, sigma, y):
"""
Compute CRPS for a Gaussian distribution.
"""
loc = (y - mu) / sigma
crps = sigma * (loc * (2 * norm.cdf(loc) - 1) +
2 * norm.pdf(loc) - 1. / np.sqrt(np.pi))
return crps
In [30]:
np.mean(crps_normal(pred_df['mean'], pred_df['std'], obs_df['obs']))
Out[30]:
In [2]:
ls
In [2]:
crps_df = pd.read_csv('crps.csv', index_col=0)
crps_df
Out[2]:
In [5]:
%matplotlib inline
import matplotlib.pyplot as plt
In [10]:
crps_df.plot.barh(x='name', y='crps')
Out[10]:
In [10]:
%qtconsole
In [14]:
ref = crps_df['crps'][0]
In [19]:
crps_df.loc[:, 'improvement %'] = (ref - crps_df['crps']) / ref * 100
In [20]:
crps_df
Out[20]:
In [32]:
crps_df.plot.barh(x='name', y='improvement %')
Out[32]:
In [33]:
import seaborn as sns
In [38]:
sns.barplot(y='name', x='improvement %', data=crps_df, palette='cubehelix_r')
plt.title('Raw ensemble CRPS: %.2f' % ref)
Out[38]:
In [ ]: