In [1]:
from utils import crps_normal


Using TensorFlow backend.
Anaconda environment: py36_gpu
Linux 4.4.0-101-generic

In [2]:
mean = 1
sigma = 3
obs = 2

In [3]:
crps_normal(mean, sigma, obs)


Out[3]:
0.83284793515116295

In [4]:
from scipy.stats import norm

In [5]:
import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
import numpy as np

In [7]:
x = np.linspace(-10, 10, 100)
plt.plot(x, norm.pdf(x, mean, sigma))
plt.axvline(obs)


Out[7]:
<matplotlib.lines.Line2D at 0x7fc9cd9060b8>

In [8]:
plt.plot(x, norm.cdf(x, mean, sigma))
plt.axvline(obs)


Out[8]:
<matplotlib.lines.Line2D at 0x7fc9cd8e7f28>

In [9]:
bins = np.arange(-10, 11)
bins


Out[9]:
array([-10,  -9,  -8,  -7,  -6,  -5,  -4,  -3,  -2,  -1,   0,   1,   2,
         3,   4,   5,   6,   7,   8,   9,  10])

In [10]:
from scipy.stats import binned_statistic

In [11]:
discrete_pdf = binned_statistic(x, norm.pdf(x, mean, sigma), bins=bins)[0]
discrete_pdf


Out[11]:
array([ 0.00027361,  0.00084067,  0.00230843,  0.00566494,  0.01242418,
        0.02435222,  0.04265903,  0.06678636,  0.0934483 ,  0.11685937,
        0.13060667,  0.13046003,  0.11646618,  0.09292485,  0.0662632 ,
        0.04222987,  0.02405312,  0.01224403,  0.00557026,  0.00226476])

In [12]:
plt.bar(bins[:-1], discrete_pdf)
plt.axvline(obs)


Out[12]:
<matplotlib.lines.Line2D at 0x7fc989a01630>

In [13]:
discrete_cdf = binned_statistic(x, norm.cdf(x, mean, sigma), bins=bins)[0]
discrete_cdf


Out[13]:
array([  2.18868160e-04,   7.33855671e-04,   2.21518074e-03,
         6.02693689e-03,   1.48015893e-02,   3.28722022e-02,
         6.61658285e-02,   1.21044127e-01,   2.01971698e-01,
         3.08742533e-01,   4.34771928e-01,   5.67865119e-01,
         6.93614298e-01,   7.99910861e-01,   8.80299806e-01,
         9.34691631e-01,   9.67616736e-01,   9.85447581e-01,
         9.94086548e-01,   9.97831012e-01])

In [14]:
plt.bar(bins[:-1], discrete_cdf)
plt.bar(bins[:-1], np.asarray(bins[:-1] > obs, dtype='float'), color='g', zorder=0.1)
plt.axvline(obs)
plt.savefig('./test_plot')



In [15]:
np.cumsum(discrete_pdf), discrete_cdf


Out[15]:
(array([  2.73605201e-04,   1.11428006e-03,   3.42270989e-03,
          9.08764703e-03,   2.15118273e-02,   4.58640472e-02,
          8.85230742e-02,   1.55309436e-01,   2.48757734e-01,
          3.65617100e-01,   4.96223769e-01,   6.26683795e-01,
          7.43149975e-01,   8.36074828e-01,   9.02338032e-01,
          9.44567898e-01,   9.68621016e-01,   9.80865051e-01,
          9.86435315e-01,   9.88700070e-01]),
 array([  2.18868160e-04,   7.33855671e-04,   2.21518074e-03,
          6.02693689e-03,   1.48015893e-02,   3.28722022e-02,
          6.61658285e-02,   1.21044127e-01,   2.01971698e-01,
          3.08742533e-01,   4.34771928e-01,   5.67865119e-01,
          6.93614298e-01,   7.99910861e-01,   8.80299806e-01,
          9.34691631e-01,   9.67616736e-01,   9.85447581e-01,
          9.94086548e-01,   9.97831012e-01]))

In [16]:
discrete_cdf - np.asarray(bins[:-1] > obs, dtype='float')


Out[16]:
array([  2.18868160e-04,   7.33855671e-04,   2.21518074e-03,
         6.02693689e-03,   1.48015893e-02,   3.28722022e-02,
         6.61658285e-02,   1.21044127e-01,   2.01971698e-01,
         3.08742533e-01,   4.34771928e-01,   5.67865119e-01,
         6.93614298e-01,  -2.00089139e-01,  -1.19700194e-01,
        -6.53083690e-02,  -3.23832638e-02,  -1.45524189e-02,
        -5.91345226e-03,  -2.16898819e-03])

In [17]:
np.mean((discrete_cdf - np.asarray(bins[:-1] > obs, dtype='float'))**2)


Out[17]:
0.060450646164963498

Sebastians example


In [18]:
probs = np.array([0.14935065, 0.15097403, 0.04707792, 0.13149351, 0.10064935, 0.08116883,
                  0.11363636, 0.02110390, 0.09902597, 0.10551948])

In [19]:
probs.sum()


Out[19]:
1.0

In [20]:
obs = 4.577418

In [21]:
bin_edges = np.arange(0, 11, 1)

In [22]:
plt.bar(bin_edges[:-1] + 0.5, probs, width=1)
plt.axvline(obs, c='r')
plt.show()



In [23]:
cum_probs = np.cumsum(probs)

In [24]:
cum_obs = np.array([0, 1])
cum_obs_bin_edges = np.array([0, obs, 10])

In [25]:
plt.bar(bin_edges[:-1] + 0.5, cum_probs, width=1, alpha=0.5)
plt.bar(cum_obs_bin_edges[:-1], cum_obs, width=np.diff(cum_obs_bin_edges), 
       zorder =0.1)


Out[25]:
<Container object of 2 artists>

In [26]:
bin_obs = np.array((bin_edges[:-1] < obs) & (bin_edges[1:] > obs), dtype=int)

In [27]:
bin_obs


Out[27]:
array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0])

In [28]:
plt.bar(bin_edges[:-1] + 0.5, probs, width=1)
plt.bar(bin_edges[:-1] + 0.5, bin_obs, width=1)
plt.axvline(obs, c='r')
plt.show()



In [29]:
rps = np.sum((probs - bin_obs) ** 2)
rps


Out[29]:
0.91432471903567214

In [30]:
cum_bin_obs = np.cumsum(bin_obs)

In [31]:
plt.bar(bin_edges[:-1] + 0.5, cum_probs, width=1, alpha=0.8)
plt.bar(bin_edges[:-1] + 0.5, cum_bin_obs, width=1, zorder=0.1)
plt.axvline(obs, c='r')
plt.show()



In [32]:
approx_crps = np.sum((cum_probs - cum_bin_obs) ** 2)

In [33]:
approx_crps


Out[33]:
0.85831822487982823

In [34]:
insert_idx = np.where(obs<bin_edges)[0][0]

In [35]:
new_bin_edges = np.insert(np.array(bin_edges, dtype=float), insert_idx, obs)
new_bin_edges


Out[35]:
array([  0.      ,   1.      ,   2.      ,   3.      ,   4.      ,
         4.577418,   5.      ,   6.      ,   7.      ,   8.      ,
         9.      ,  10.      ])

In [36]:
new_probs = np.insert(probs, insert_idx, probs[insert_idx-1])

In [37]:
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), new_probs, 
        width=np.diff(new_bin_edges), linewidth=1, edgecolor='k')
plt.show()



In [38]:
new_bin_obs = np.array((new_bin_edges[:-1] <= obs) & (new_bin_edges[1:] > obs), dtype=int)

In [39]:
new_bin_edges[:-1], obs,(new_bin_edges[:-1] < obs)


Out[39]:
(array([ 0.      ,  1.      ,  2.      ,  3.      ,  4.      ,  4.577418,
         5.      ,  6.      ,  7.      ,  8.      ,  9.      ]),
 4.577418,
 array([ True,  True,  True,  True,  True, False, False, False, False,
        False, False], dtype=bool))

In [40]:
new_bin_obs


Out[40]:
array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])

In [41]:
new_cum_bin_obs = np.cumsum(new_bin_obs)
new_cum_probs = np.cumsum(new_probs * np.diff(new_bin_edges))

In [42]:
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), new_cum_probs, 
        width=np.diff(new_bin_edges), linewidth=1, edgecolor='k', alpha=0.8)
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), new_cum_bin_obs, 
        width=np.diff(new_bin_edges), linewidth=1, color='r', zorder=0.1)
plt.bar(bin_edges[:-1] + 0.5, cum_probs, width=1, alpha=0.8)
plt.axvline(obs, c='r')
plt.show()



In [43]:
crps = np.sum((((new_cum_probs - new_cum_bin_obs) * np.diff(new_bin_edges)) ** 2) )

In [44]:
crps


Out[44]:
0.80925530861832939

In [45]:
np.sum(((new_cum_probs - new_cum_bin_obs) **2 * np.diff(new_bin_edges)**2))


Out[45]:
0.80925530861832939

In [46]:
(cum_probs - cum_bin_obs) ** 2


Out[46]:
array([  2.23056167e-02,   9.01949134e-02,   1.20688566e-01,
         2.29341484e-01,   1.76782020e-01,   1.15114793e-01,
         5.09176292e-02,   4.18388411e-02,   1.11343607e-02,
         1.23259516e-32])

In [47]:
np.diff(new_bin_edges)


Out[47]:
array([ 1.      ,  1.      ,  1.      ,  1.      ,  0.577418,  0.422582,
        1.      ,  1.      ,  1.      ,  1.      ,  1.      ])

In [51]:
a = np.concatenate(([0], new_cum_probs))

In [52]:
fix = (a[:-1] + a[1:]) / 2

In [53]:
plt.plot(new_bin_edges, np.concatenate(([0], new_cum_probs)))
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), new_cum_probs, 
        width=np.diff(new_bin_edges), linewidth=1, edgecolor='k', alpha=0.8)
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), fix, 
        width=np.diff(new_bin_edges), linewidth=1, color='r', zorder=2)
plt.plot(new_bin_edges, np.concatenate(([0], new_cum_bin_obs)))
plt.axvline(obs, c='g')


Out[53]:
<matplotlib.lines.Line2D at 0x7fc984a53748>

In [54]:
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), new_cum_probs, 
        width=np.diff(new_bin_edges), linewidth=1, edgecolor='k', alpha=0.8)
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), fix, 
        width=np.diff(new_bin_edges), linewidth=1, color='r', zorder=2)
plt.axvline(obs, c='r')
plt.show()



In [55]:
crps = np.sum((((fix - new_cum_bin_obs)  ) ** 2) *np.diff(new_bin_edges))

In [56]:
crps


Out[56]:
0.86032034780350819

Function for real(?) CRPS


In [57]:
# Test data
preds = np.stack([probs, probs])

In [58]:
preds.shape, preds   # [sample, bin]


Out[58]:
((2, 10),
 array([[ 0.14935065,  0.15097403,  0.04707792,  0.13149351,  0.10064935,
          0.08116883,  0.11363636,  0.0211039 ,  0.09902597,  0.10551948],
        [ 0.14935065,  0.15097403,  0.04707792,  0.13149351,  0.10064935,
          0.08116883,  0.11363636,  0.0211039 ,  0.09902597,  0.10551948]]))

In [95]:
targets = np.array([obs, 9.0000])

In [60]:
targets.shape, targets


Out[60]:
((2,), array([ 4.577418,  9.      ]))

In [61]:
a = np.repeat(np.atleast_2d(bin_edges), targets.shape[0], axis=0)
a.shape


Out[61]:
(2, 11)

In [62]:
a - targets


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-62-be23cdf59844> in <module>()
----> 1 a - targets

ValueError: operands could not be broadcast together with shapes (2,11) (2,) 

In [72]:
b = a - targets
b[b < 0] = 999

In [73]:
b


Out[73]:
array([[  9.99000000e+02,   9.99000000e+02],
       [  9.99000000e+02,   9.99000000e+02],
       [  9.99000000e+02,   9.99000000e+02],
       [  9.99000000e+02,   9.99000000e+02],
       [  9.99000000e+02,   9.99000000e+02],
       [  4.22582000e-01,   9.99000000e+02],
       [  1.42258200e+00,   9.99000000e+02],
       [  2.42258200e+00,   9.99000000e+02],
       [  3.42258200e+00,   9.99000000e+02],
       [  4.42258200e+00,   9.99000000e+02],
       [  5.42258200e+00,   9.00000000e-01]])

In [76]:
insert_idx = np.argmin(b, axis=0)

In [77]:
insert_idx


Out[77]:
array([ 5, 10])

In [86]:
a


Out[86]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]])

In [91]:
new_bin_edges = np.insert(np.array(a, dtype=float), insert_idx, np.atleast_2d(targets), axis=1)

In [92]:
new_bin_edges


Out[92]:
array([[  0.      ,   1.      ,   2.      ,   3.      ,   4.      ,
          4.577418,   5.      ,   6.      ,   7.      ,   8.      ,
          9.      ,   9.1     ,  10.      ],
       [  0.      ,   1.      ,   2.      ,   3.      ,   4.      ,
          4.577418,   5.      ,   6.      ,   7.      ,   8.      ,
          9.      ,   9.1     ,  10.      ]])

In [97]:
c = np.array([np.insert(np.array(bin_edges, dtype=float), insert_idx[i], targets[i]) 
              for i in range(targets.shape[0])])

In [98]:
c


Out[98]:
array([[  0.      ,   1.      ,   2.      ,   3.      ,   4.      ,
          4.577418,   5.      ,   6.      ,   7.      ,   8.      ,
          9.      ,  10.      ],
       [  0.      ,   1.      ,   2.      ,   3.      ,   4.      ,
          5.      ,   6.      ,   7.      ,   8.      ,   9.      ,
          9.1     ,  10.      ]])

In [99]:
d = np.array([np.insert(preds[i], insert_idx[i], preds[i, insert_idx[i]-1]) 
              for i in range(targets.shape[0])])

In [100]:
d


Out[100]:
array([[ 0.14935065,  0.15097403,  0.04707792,  0.13149351,  0.10064935,
         0.10064935,  0.08116883,  0.11363636,  0.0211039 ,  0.09902597,
         0.10551948],
       [ 0.14935065,  0.15097403,  0.04707792,  0.13149351,  0.10064935,
         0.08116883,  0.11363636,  0.0211039 ,  0.09902597,  0.10551948,
         0.10551948]])

In [107]:
new_bin_obs = np.array([(c[i, :-1] <= targets[i]) & (c[i, 1:] > targets[i]) 
              for i in range(targets.shape[0])], dtype=int)
new_bin_obs


Out[107]:
array([[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

In [114]:
d = d * np.diff(c, axis=1)

In [115]:
d


Out[115]:
array([[ 0.14935065,  0.15097403,  0.04707792,  0.13149351,  0.05811675,
         0.0425326 ,  0.08116883,  0.11363636,  0.0211039 ,  0.09902597,
         0.10551948],
       [ 0.14935065,  0.15097403,  0.04707792,  0.13149351,  0.10064935,
         0.08116883,  0.11363636,  0.0211039 ,  0.09902597,  0.01055195,
         0.09496753]])

In [116]:
new_cum_bin_obs = np.cumsum(new_bin_obs, axis=1)
new_cum_probs = np.cumsum(d, axis=1)

In [117]:
new_cum_bin_obs, new_cum_probs


Out[117]:
(array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]),
 array([[ 0.14935065,  0.30032468,  0.3474026 ,  0.47889611,  0.53701286,
          0.57954546,  0.66071429,  0.77435065,  0.79545455,  0.89448052,
          1.        ],
        [ 0.14935065,  0.30032468,  0.3474026 ,  0.47889611,  0.57954546,
          0.66071429,  0.77435065,  0.79545455,  0.89448052,  0.90503247,
          1.        ]]))

In [112]:
new_cum_probs = (new_cum_probs.T / new_cum_probs[:, -1]).T
new_cum_probs


Out[112]:
array([[ 0.13569322,  0.27286136,  0.31563422,  0.43510325,  0.52654868,
         0.61799411,  0.69174042,  0.79498525,  0.8141593 ,  0.90412979,
         1.        ],
       [ 0.13509545,  0.27165933,  0.31424376,  0.4331865 ,  0.52422908,
         0.59765052,  0.70044053,  0.71953011,  0.80910426,  0.90455213,
         1.        ]])

In [123]:
f = np.concatenate((np.zeros((new_cum_probs.shape[0], 1)), new_cum_probs), axis=1)

In [124]:
fix = (a[:-1] + a[1:]) / 2


Out[124]:
array([[ 0.        ,  0.14935065,  0.30032468,  0.3474026 ,  0.47889611,
         0.53701286,  0.57954546,  0.66071429,  0.77435065,  0.79545455,
         0.89448052,  1.        ],
       [ 0.        ,  0.14935065,  0.30032468,  0.3474026 ,  0.47889611,
         0.57954546,  0.66071429,  0.77435065,  0.79545455,  0.89448052,
         0.90503247,  1.        ]])

In [64]:
import pdb
def maybe_correct_crps(preds, targets, bin_edges):
    #pdb.set_trace()
    #Convert input arrays
    preds = np.array(np.atleast_2d(preds), dtype='float')
    targets = np.array(np.atleast_1d(targets), dtype='float')
    
    # preds [sample, bins]
    # Find insert index
    pdb.set_trace
    mat_bins = np.repeat(np.atleast_2d(bin_edges), targets.shape[0], axis=0)
    b = mat_bins.T - targets
    b[b < 0] = 999
    insert_idxs = np.argmin(b, axis=0)
    
    # Insert 
    ins_bin_edges = np.array([np.insert(np.array(bin_edges, dtype=float), 
                                        insert_idxs[i], targets[i]) 
                              for i in range(targets.shape[0])])
    ins_preds = np.array([np.insert(preds[i], insert_idxs[i], preds[i, insert_idxs[i]-1]) 
                          for i in range(targets.shape[0])])
    
    # Get obs
    bin_obs = np.array([(ins_bin_edges[i, :-1] <= targets[i]) & 
                        (ins_bin_edges[i, 1:] > targets[i]) 
                        for i in range(targets.shape[0])], dtype=int)
    
    # Cumsum with weights
    ins_preds *= np.diff(ins_bin_edges, axis=1)
    cum_bin_obs = np.cumsum(bin_obs, axis=1)
    cum_probs = np.cumsum(ins_preds, axis=1)
    cum_probs = (cum_probs.T / cum_probs[:, -1]).T
    
    # Get adjusted preds
    adj_cum_probs = np.concatenate((np.zeros((cum_probs.shape[0], 1)), cum_probs), axis=1)
    adj_cum_probs = (adj_cum_probs[:, :-1] + adj_cum_probs[:, 1:]) / 2
    
    # Compute CRPS
    crps = np.mean(np.sum(((adj_cum_probs - cum_bin_obs) ** 2) * 
                          np.diff(ins_bin_edges, axis=1), axis=1))
    return crps

In [94]:
maybe_correct_crps(preds, targets, bin_edges)


Out[94]:
1.8521755276904965

In [102]:
def maybe_correct_crps2(preds, targets, bin_edges):
    #pdb.set_trace()
    #Convert input arrays
    preds = np.array(np.atleast_2d(preds), dtype='float')
    targets = np.array(np.atleast_1d(targets), dtype='float')
    
    # preds [sample, bins]
    # Find insert index
    pdb.set_trace
    mat_bins = np.repeat(np.atleast_2d(bin_edges), targets.shape[0], axis=0)
    b = mat_bins.T - targets
    b[b < 0] = 999
    insert_idxs = np.argmin(b, axis=0)
    
    # Insert 
    ins_bin_edges = np.array([np.insert(np.array(bin_edges, dtype=float), 
                                        insert_idxs[i], targets[i]) 
                              for i in range(targets.shape[0])])
    ins_preds = np.array([np.insert(preds[i], insert_idxs[i], preds[i, insert_idxs[i]-1]) 
                          for i in range(targets.shape[0])])
    
    # Get obs
    bin_obs = np.array([(ins_bin_edges[i, :-1] <= targets[i]) & 
                        (ins_bin_edges[i, 1:] > targets[i]) 
                        for i in range(targets.shape[0])], dtype=int)
    
    # Cumsum with weights
    ins_preds *= np.diff(ins_bin_edges, axis=1)
    cum_bin_obs = np.cumsum(bin_obs, axis=1)
    cum_probs = np.cumsum(ins_preds, axis=1)
    cum_probs = (cum_probs.T / cum_probs[:, -1]).T
    
    # Get adjusted preds
    adj_cum_probs = np.concatenate((np.zeros((cum_probs.shape[0], 1)), cum_probs), axis=1)
    print(adj_cum_probs.shape, cum_bin_obs.shape)
    sq_list = []
    
    for i in range(cum_bin_obs.shape[1]):
        x_l = np.abs(cum_bin_obs[:, i] - adj_cum_probs[:, i])
        x_r = np.abs(cum_bin_obs[:, i] - adj_cum_probs[:, i + 1])
        sq = 1./3. * (x_l ** 2 + x_l * x_r + x_r ** 2)
        if np.isnan(np.mean(sq)):
            pdb.set_trace()
        sq_list.append(sq)
    crps = np.sum(np.array(sq_list).T * np.diff(ins_bin_edges, axis=1), axis=1)
    return crps

In [103]:
maybe_correct_crps2(preds, targets, bin_edges)


(2, 12) (2, 11)
Out[103]:
array([ 0.86933767,  2.85365808])

In [97]:
np.nanmax([np.nan, 1])


Out[97]:
1.0

In [ ]: