In [1]:
from utils import crps_normal
In [2]:
mean = 1
sigma = 3
obs = 2
In [3]:
crps_normal(mean, sigma, obs)
Out[3]:
In [4]:
from scipy.stats import norm
In [5]:
import matplotlib.pyplot as plt
%matplotlib inline
In [6]:
import numpy as np
In [7]:
x = np.linspace(-10, 10, 100)
plt.plot(x, norm.pdf(x, mean, sigma))
plt.axvline(obs)
Out[7]:
In [8]:
plt.plot(x, norm.cdf(x, mean, sigma))
plt.axvline(obs)
Out[8]:
In [9]:
bins = np.arange(-10, 11)
bins
Out[9]:
In [10]:
from scipy.stats import binned_statistic
In [11]:
discrete_pdf = binned_statistic(x, norm.pdf(x, mean, sigma), bins=bins)[0]
discrete_pdf
Out[11]:
In [12]:
plt.bar(bins[:-1], discrete_pdf)
plt.axvline(obs)
Out[12]:
In [13]:
discrete_cdf = binned_statistic(x, norm.cdf(x, mean, sigma), bins=bins)[0]
discrete_cdf
Out[13]:
In [14]:
plt.bar(bins[:-1], discrete_cdf)
plt.bar(bins[:-1], np.asarray(bins[:-1] > obs, dtype='float'), color='g', zorder=0.1)
plt.axvline(obs)
plt.savefig('./test_plot')
In [15]:
np.cumsum(discrete_pdf), discrete_cdf
Out[15]:
In [16]:
discrete_cdf - np.asarray(bins[:-1] > obs, dtype='float')
Out[16]:
In [17]:
np.mean((discrete_cdf - np.asarray(bins[:-1] > obs, dtype='float'))**2)
Out[17]:
In [18]:
probs = np.array([0.14935065, 0.15097403, 0.04707792, 0.13149351, 0.10064935, 0.08116883,
0.11363636, 0.02110390, 0.09902597, 0.10551948])
In [19]:
probs.sum()
Out[19]:
In [20]:
obs = 4.577418
In [21]:
bin_edges = np.arange(0, 11, 1)
In [22]:
plt.bar(bin_edges[:-1] + 0.5, probs, width=1)
plt.axvline(obs, c='r')
plt.show()
In [23]:
cum_probs = np.cumsum(probs)
In [24]:
cum_obs = np.array([0, 1])
cum_obs_bin_edges = np.array([0, obs, 10])
In [25]:
plt.bar(bin_edges[:-1] + 0.5, cum_probs, width=1, alpha=0.5)
plt.bar(cum_obs_bin_edges[:-1], cum_obs, width=np.diff(cum_obs_bin_edges),
zorder =0.1)
Out[25]:
In [26]:
bin_obs = np.array((bin_edges[:-1] < obs) & (bin_edges[1:] > obs), dtype=int)
In [27]:
bin_obs
Out[27]:
In [28]:
plt.bar(bin_edges[:-1] + 0.5, probs, width=1)
plt.bar(bin_edges[:-1] + 0.5, bin_obs, width=1)
plt.axvline(obs, c='r')
plt.show()
In [29]:
rps = np.sum((probs - bin_obs) ** 2)
rps
Out[29]:
In [30]:
cum_bin_obs = np.cumsum(bin_obs)
In [31]:
plt.bar(bin_edges[:-1] + 0.5, cum_probs, width=1, alpha=0.8)
plt.bar(bin_edges[:-1] + 0.5, cum_bin_obs, width=1, zorder=0.1)
plt.axvline(obs, c='r')
plt.show()
In [32]:
approx_crps = np.sum((cum_probs - cum_bin_obs) ** 2)
In [33]:
approx_crps
Out[33]:
In [34]:
insert_idx = np.where(obs<bin_edges)[0][0]
In [35]:
new_bin_edges = np.insert(np.array(bin_edges, dtype=float), insert_idx, obs)
new_bin_edges
Out[35]:
In [36]:
new_probs = np.insert(probs, insert_idx, probs[insert_idx-1])
In [37]:
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), new_probs,
width=np.diff(new_bin_edges), linewidth=1, edgecolor='k')
plt.show()
In [38]:
new_bin_obs = np.array((new_bin_edges[:-1] <= obs) & (new_bin_edges[1:] > obs), dtype=int)
In [39]:
new_bin_edges[:-1], obs,(new_bin_edges[:-1] < obs)
Out[39]:
In [40]:
new_bin_obs
Out[40]:
In [41]:
new_cum_bin_obs = np.cumsum(new_bin_obs)
new_cum_probs = np.cumsum(new_probs * np.diff(new_bin_edges))
In [42]:
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), new_cum_probs,
width=np.diff(new_bin_edges), linewidth=1, edgecolor='k', alpha=0.8)
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), new_cum_bin_obs,
width=np.diff(new_bin_edges), linewidth=1, color='r', zorder=0.1)
plt.bar(bin_edges[:-1] + 0.5, cum_probs, width=1, alpha=0.8)
plt.axvline(obs, c='r')
plt.show()
In [43]:
crps = np.sum((((new_cum_probs - new_cum_bin_obs) * np.diff(new_bin_edges)) ** 2) )
In [44]:
crps
Out[44]:
In [45]:
np.sum(((new_cum_probs - new_cum_bin_obs) **2 * np.diff(new_bin_edges)**2))
Out[45]:
In [46]:
(cum_probs - cum_bin_obs) ** 2
Out[46]:
In [47]:
np.diff(new_bin_edges)
Out[47]:
In [51]:
a = np.concatenate(([0], new_cum_probs))
In [52]:
fix = (a[:-1] + a[1:]) / 2
In [53]:
plt.plot(new_bin_edges, np.concatenate(([0], new_cum_probs)))
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), new_cum_probs,
width=np.diff(new_bin_edges), linewidth=1, edgecolor='k', alpha=0.8)
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), fix,
width=np.diff(new_bin_edges), linewidth=1, color='r', zorder=2)
plt.plot(new_bin_edges, np.concatenate(([0], new_cum_bin_obs)))
plt.axvline(obs, c='g')
Out[53]:
In [54]:
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), new_cum_probs,
width=np.diff(new_bin_edges), linewidth=1, edgecolor='k', alpha=0.8)
plt.bar(new_bin_edges[:-1] + 0.5 * np.diff(new_bin_edges), fix,
width=np.diff(new_bin_edges), linewidth=1, color='r', zorder=2)
plt.axvline(obs, c='r')
plt.show()
In [55]:
crps = np.sum((((fix - new_cum_bin_obs) ) ** 2) *np.diff(new_bin_edges))
In [56]:
crps
Out[56]:
In [57]:
# Test data
preds = np.stack([probs, probs])
In [58]:
preds.shape, preds # [sample, bin]
Out[58]:
In [95]:
targets = np.array([obs, 9.0000])
In [60]:
targets.shape, targets
Out[60]:
In [61]:
a = np.repeat(np.atleast_2d(bin_edges), targets.shape[0], axis=0)
a.shape
Out[61]:
In [62]:
a - targets
In [72]:
b = a - targets
b[b < 0] = 999
In [73]:
b
Out[73]:
In [76]:
insert_idx = np.argmin(b, axis=0)
In [77]:
insert_idx
Out[77]:
In [86]:
a
Out[86]:
In [91]:
new_bin_edges = np.insert(np.array(a, dtype=float), insert_idx, np.atleast_2d(targets), axis=1)
In [92]:
new_bin_edges
Out[92]:
In [97]:
c = np.array([np.insert(np.array(bin_edges, dtype=float), insert_idx[i], targets[i])
for i in range(targets.shape[0])])
In [98]:
c
Out[98]:
In [99]:
d = np.array([np.insert(preds[i], insert_idx[i], preds[i, insert_idx[i]-1])
for i in range(targets.shape[0])])
In [100]:
d
Out[100]:
In [107]:
new_bin_obs = np.array([(c[i, :-1] <= targets[i]) & (c[i, 1:] > targets[i])
for i in range(targets.shape[0])], dtype=int)
new_bin_obs
Out[107]:
In [114]:
d = d * np.diff(c, axis=1)
In [115]:
d
Out[115]:
In [116]:
new_cum_bin_obs = np.cumsum(new_bin_obs, axis=1)
new_cum_probs = np.cumsum(d, axis=1)
In [117]:
new_cum_bin_obs, new_cum_probs
Out[117]:
In [112]:
new_cum_probs = (new_cum_probs.T / new_cum_probs[:, -1]).T
new_cum_probs
Out[112]:
In [123]:
f = np.concatenate((np.zeros((new_cum_probs.shape[0], 1)), new_cum_probs), axis=1)
In [124]:
fix = (a[:-1] + a[1:]) / 2
Out[124]:
In [64]:
import pdb
def maybe_correct_crps(preds, targets, bin_edges):
#pdb.set_trace()
#Convert input arrays
preds = np.array(np.atleast_2d(preds), dtype='float')
targets = np.array(np.atleast_1d(targets), dtype='float')
# preds [sample, bins]
# Find insert index
pdb.set_trace
mat_bins = np.repeat(np.atleast_2d(bin_edges), targets.shape[0], axis=0)
b = mat_bins.T - targets
b[b < 0] = 999
insert_idxs = np.argmin(b, axis=0)
# Insert
ins_bin_edges = np.array([np.insert(np.array(bin_edges, dtype=float),
insert_idxs[i], targets[i])
for i in range(targets.shape[0])])
ins_preds = np.array([np.insert(preds[i], insert_idxs[i], preds[i, insert_idxs[i]-1])
for i in range(targets.shape[0])])
# Get obs
bin_obs = np.array([(ins_bin_edges[i, :-1] <= targets[i]) &
(ins_bin_edges[i, 1:] > targets[i])
for i in range(targets.shape[0])], dtype=int)
# Cumsum with weights
ins_preds *= np.diff(ins_bin_edges, axis=1)
cum_bin_obs = np.cumsum(bin_obs, axis=1)
cum_probs = np.cumsum(ins_preds, axis=1)
cum_probs = (cum_probs.T / cum_probs[:, -1]).T
# Get adjusted preds
adj_cum_probs = np.concatenate((np.zeros((cum_probs.shape[0], 1)), cum_probs), axis=1)
adj_cum_probs = (adj_cum_probs[:, :-1] + adj_cum_probs[:, 1:]) / 2
# Compute CRPS
crps = np.mean(np.sum(((adj_cum_probs - cum_bin_obs) ** 2) *
np.diff(ins_bin_edges, axis=1), axis=1))
return crps
In [94]:
maybe_correct_crps(preds, targets, bin_edges)
Out[94]:
In [102]:
def maybe_correct_crps2(preds, targets, bin_edges):
#pdb.set_trace()
#Convert input arrays
preds = np.array(np.atleast_2d(preds), dtype='float')
targets = np.array(np.atleast_1d(targets), dtype='float')
# preds [sample, bins]
# Find insert index
pdb.set_trace
mat_bins = np.repeat(np.atleast_2d(bin_edges), targets.shape[0], axis=0)
b = mat_bins.T - targets
b[b < 0] = 999
insert_idxs = np.argmin(b, axis=0)
# Insert
ins_bin_edges = np.array([np.insert(np.array(bin_edges, dtype=float),
insert_idxs[i], targets[i])
for i in range(targets.shape[0])])
ins_preds = np.array([np.insert(preds[i], insert_idxs[i], preds[i, insert_idxs[i]-1])
for i in range(targets.shape[0])])
# Get obs
bin_obs = np.array([(ins_bin_edges[i, :-1] <= targets[i]) &
(ins_bin_edges[i, 1:] > targets[i])
for i in range(targets.shape[0])], dtype=int)
# Cumsum with weights
ins_preds *= np.diff(ins_bin_edges, axis=1)
cum_bin_obs = np.cumsum(bin_obs, axis=1)
cum_probs = np.cumsum(ins_preds, axis=1)
cum_probs = (cum_probs.T / cum_probs[:, -1]).T
# Get adjusted preds
adj_cum_probs = np.concatenate((np.zeros((cum_probs.shape[0], 1)), cum_probs), axis=1)
print(adj_cum_probs.shape, cum_bin_obs.shape)
sq_list = []
for i in range(cum_bin_obs.shape[1]):
x_l = np.abs(cum_bin_obs[:, i] - adj_cum_probs[:, i])
x_r = np.abs(cum_bin_obs[:, i] - adj_cum_probs[:, i + 1])
sq = 1./3. * (x_l ** 2 + x_l * x_r + x_r ** 2)
if np.isnan(np.mean(sq)):
pdb.set_trace()
sq_list.append(sq)
crps = np.sum(np.array(sq_list).T * np.diff(ins_bin_edges, axis=1), axis=1)
return crps
In [103]:
maybe_correct_crps2(preds, targets, bin_edges)
Out[103]:
In [97]:
np.nanmax([np.nan, 1])
Out[97]:
In [ ]: