notebook.community

Edit and run



In [1]:

    
import pandas as pd



In [2]:

    
ls









    



Untitled.ipynb                         evaluate_predictions.py
emos_network_train_2015_pred_2016.csv  obs.csv



In [3]:

    
obs_df = pd.read_csv('obs.csv')
pred_df = pd.read_csv('emos_network_train_2015_pred_2016.csv')



In [5]:

    
obs_df.head()



In [6]:

    
pred_df.head()



In [7]:

    
len(obs_df)









    Out[7]:





182218



In [8]:

    
len(pred_df)









    Out[8]:





182218



In [14]:

    
obs_df = obs_df.sort_values(['date', 'station'])
pred_df = pred_df.sort_values(['date', 'station_id'])



In [16]:

    
obs_df['date'] == pred_df['date']









    Out[16]:





0         True
1         True
2         True
3         True
4         True
5         True
6         True
7         True
8         True
9         True
10        True
11        True
12        True
13        True
14        True
15        True
16        True
17        True
18        True
19        True
20        True
21        True
22        True
23        True
24        True
25        True
26        True
27        True
28        True
29        True
          ... 
182188    True
182189    True
182190    True
182191    True
182192    True
182193    True
182194    True
182195    True
182196    True
182197    True
182198    True
182199    True
182200    True
182201    True
182202    True
182203    True
182204    True
182205    True
182206    True
182207    True
182208    True
182209    True
182210    True
182211    True
182212    True
182213    True
182214    True
182215    True
182216    True
182217    True
Name: date, Length: 182218, dtype: bool



In [17]:

    
from pandas.util.testing import assert_frame_equal



In [21]:

    
obs_df['date'].equals(pred_df['date'])









    Out[21]:





True



In [22]:

    
obs_df['station'].equals(pred_df['station_id'])









    Out[22]:





True



In [23]:

    
obs_df = obs_df.sort_values(['station', 'date'])



In [24]:

    
obs_df['date'].equals(pred_df['date'])









    Out[24]:





False



In [25]:

    
obs_df = obs_df.sort_values(['date', 'station'])



In [28]:

    
from scipy.stats import norm
import numpy as np
def crps_normal(mu, sigma, y):
    """
    Compute CRPS for a Gaussian distribution. 
    """
    loc = (y - mu) / sigma
    crps = sigma * (loc * (2 * norm.cdf(loc) - 1) + 
                    2 * norm.pdf(loc) - 1. / np.sqrt(np.pi))
    return crps



In [30]:

    
np.mean(crps_normal(pred_df['mean'], pred_df['std'], obs_df['obs']))









    Out[30]:





1.011747631876825



In [2]:

    
ls









    



crps.csv                               evaluate_predictions.py
dataframe_tests.ipynb                  obs.csv
emos_network_train_2015_pred_2016.csv



In [2]:

    
crps_df = pd.read_csv('crps.csv', index_col=0)
crps_df









    Out[2]:







  
    
      
      crps
      name
    
  
  
    
      0
      1.156820
      raw_ensemble
    
    
      1
      1.020197
      emos_network_rolling_window
    
    
      2
      1.012283
      emos_network_train_2015_pred_2016
    
    
      3
      1.005431
      fc_network_rolling_window
    
    
      4
      1.011311
      fc_network_train_2015_pred_2016
    
    
      5
      1.014767
      hidden_nn_train_2015_pred_2016
    
    
      6
      0.912454
      embedding_nn_train_2015_pred_2016
    
    
      7
      0.936979
      hidden_nn_aux_train_2015_pred_2016
    
    
      8
      0.857939
      embedding_nn_aux_train_2015_pred_2016



In [5]:

    
%matplotlib inline
import matplotlib.pyplot as plt



In [10]:

    
crps_df.plot.barh(x='name', y='crps')









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x1167c3208>



In [10]:

    
%qtconsole



In [14]:

    
ref = crps_df['crps'][0]



In [19]:

    
crps_df.loc[:, 'improvement %'] = (ref - crps_df['crps'])  / ref * 100



In [20]:

    
crps_df









    Out[20]:







  
    
      
      crps
      name
      improvement %
    
  
  
    
      0
      1.156820
      raw_ensemble
      0.000000
    
    
      1
      1.020197
      emos_network_rolling_window
      11.810252
    
    
      2
      1.012283
      emos_network_train_2015_pred_2016
      12.494324
    
    
      3
      1.005431
      fc_network_rolling_window
      13.086633
    
    
      4
      1.011311
      fc_network_train_2015_pred_2016
      12.578399
    
    
      5
      1.014767
      hidden_nn_train_2015_pred_2016
      12.279625
    
    
      6
      0.912454
      embedding_nn_train_2015_pred_2016
      21.123998
    
    
      7
      0.936979
      hidden_nn_aux_train_2015_pred_2016
      19.003944
    
    
      8
      0.857939
      embedding_nn_aux_train_2015_pred_2016
      25.836483



In [32]:

    
crps_df.plot.barh(x='name', y='improvement %')









    Out[32]:





<matplotlib.axes._subplots.AxesSubplot at 0x1183dd630>



In [33]:

    
import seaborn as sns



In [38]:

    
sns.barplot(y='name', x='improvement %', data=crps_df, palette='cubehelix_r')
plt.title('Raw ensemble CRPS: %.2f' % ref)









    Out[38]:





<matplotlib.text.Text at 0x11aca9470>



In [ ]:

	Unnamed: 0	date	obs	station
0	0	2016-01-01	4.3	44.0
1	1	2016-01-01	3.3	71.0
2	2	2016-01-01	-0.8	73.0
3	3	2016-01-01	3.2	78.0
4	4	2016-01-01	3.5	91.0

	Unnamed: 0	date	mean	station_id	std
0	0	2016-01-01	4.442505	44.0	1.651832
1	1	2016-01-01	1.637181	71.0	2.386799
2	2	2016-01-01	0.611089	73.0	1.772206
3	3	2016-01-01	4.397912	78.0	1.700036
4	4	2016-01-01	1.966224	91.0	2.544652

	crps	name
0	1.156820	raw_ensemble
1	1.020197	emos_network_rolling_window
2	1.012283	emos_network_train_2015_pred_2016
3	1.005431	fc_network_rolling_window
4	1.011311	fc_network_train_2015_pred_2016
5	1.014767	hidden_nn_train_2015_pred_2016
6	0.912454	embedding_nn_train_2015_pred_2016
7	0.936979	hidden_nn_aux_train_2015_pred_2016
8	0.857939	embedding_nn_aux_train_2015_pred_2016

	crps	name	improvement %
0	1.156820	raw_ensemble	0.000000
1	1.020197	emos_network_rolling_window	11.810252
2	1.012283	emos_network_train_2015_pred_2016	12.494324
3	1.005431	fc_network_rolling_window	13.086633
4	1.011311	fc_network_train_2015_pred_2016	12.578399
5	1.014767	hidden_nn_train_2015_pred_2016	12.279625
6	0.912454	embedding_nn_train_2015_pred_2016	21.123998
7	0.936979	hidden_nn_aux_train_2015_pred_2016	19.003944
8	0.857939	embedding_nn_aux_train_2015_pred_2016	25.836483

Table of Contents