In [106]:
import datetime
print(datetime.datetime.now().isoformat())
Using a statistical model of the relationship between the slot filling at 464keV vs Dst.
We model the number of slot filling events as a random sample from a binomial distribution meaning that there is a probability of the event occuring and there may be a parameter that changes the probability. This is not meant as a correlation but as a success failure creiteria.
From Wikipedia: In probability theory and statistics, the binomial distribution with parameters n and p is the discrete probability distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields success with probability p.
$ change \sim Bin(n,p) \\ logit(p) = \alpha + \beta x \\ a \sim N(0,5) \\ \beta \sim N(0,10) $
where we set vague priors for $\alpha$ and $\beta$, the parameters for the logistic model.
This is the same technique used in the estimation of deaths due to a concentration of a chemical.
In [107]:
# http://onlinelibrary.wiley.com/doi/10.1002/2016JA022652/epdf
import pymc
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt
import spacepy.plot as spp
Data delivered by Geoff Reeves 9/6/2016
In [108]:
# min_Dst, min_L
data = np.asarray([
65.000, 3.8000,
50.000, 3.7000,
67.000, 3.5000,
61.000, 3.4000,
77.000, 3.2000,
99.000, 2.8900,
87.000, 2.8000,
98.000, 2.8000,
96.000, 2.8000,
93.000, 2.3000,
92.000, 2.3000,
225.00, 2.3000,
206.00, 2.3000,
125.00, 2.3000]).reshape((-1,2))
dst = data[:,0]
minL = data[:,1]
print(dst, minL, data.dtype)
In [110]:
# make bins in Dst
dst_bins = np.arange(25, 300, 50)
print(dst_bins)
dst_bins_centers = np.asarray([dst_bins[:-1] + np.diff(dst_bins)/2]).T[:,0]
print(dst_bins_centers, dst_bins_centers.shape)
n_events_dig = np.digitize(dst, dst_bins)
print(n_events_dig)
n_events = np.zeros(len(dst_bins)-1)
success = np.zeros_like(n_events)
for i, v in enumerate(np.unique(n_events_dig)):
n_events[v-1] = np.sum(n_events_dig==v)
success[v-1] = np.sum(minL[n_events_dig==v] <= 2.4)
print(n_events)
print(success)
In [111]:
plt.plot(dst, minL, 'o')
plt.xlim((25, 250))
plt.ylim((2.2, 4.0))
plt.xlabel('Min Dst')
plt.ylabel('Min L Shell')
Out[111]:
In [112]:
plt.plot(dst, minL, 'o')
plt.xlim((25, 250))
plt.ylim((2.2, 4.0))
plt.xlabel('Min Dst')
plt.ylabel('Min L Shell')
for v in dst_bins:
plt.axvline(v, lw=0.5)
plt.axhline(2.4, c='r', lw=1)
Out[112]:
In [116]:
# define invlogit function
def invlogit(x):
return pymc.exp(x) / (1 + pymc.exp(x))
Setup the Bayesian model accorind to the description above. Run the Markov chain monte carlo (MCMC) to sample the posterior distributions for $\alpha$ and $\beta$
In [117]:
# define priors
# these are wide uninformative priors
# alpha = pymc.Normal('alpha', mu=0, tau=1.0/5**2)
# beta = pymc.Normal('beta', mu=0, tau=1.0/10**2)
alpha = pymc.Uniform('alpha', -500, 100, value = 1e-2)
beta = pymc.Uniform('beta', -100, 100, value = 1e-2)
# cannot feed in zero events
ind = n_events > 0
print(n_events.shape, dst_bins_centers.shape, success.shape, )
# define likelihood
p = pymc.InvLogit('p', alpha + beta*dst_bins_centers[ind])
print('n_events', n_events[ind], 'p', p.value, 'success', success[ind], )
y = pymc.Binomial('y_obs', n=n_events[ind], p=p, value=success[ind], observed=True)
In [118]:
# inference
m = pymc.Model([alpha, beta, y])
mc = pymc.MCMC(m)
mc.sample(iter=500000, burn=1000, burn_till_tuned=True, thin=300)
Make diagnostic plots of the posteriour distributions as created using MCMC.
In [119]:
pymc.Matplot.plot(mc)
In [120]:
pprint(mc.stats())
In [121]:
xp = np.linspace(dst_bins_centers[ind].min(), dst_bins_centers[ind].max(), 100)
a = alpha.stats()['mean']
b = beta.stats()['mean']
y_val = invlogit(a + b*xp).value
plt.plot(xp, y_val)
plt.scatter(dst_bins_centers[ind], success[ind]/n_events[ind], s=50);
plt.xlabel('Minimum Dst')
plt.ylabel('Probability slot is filled')
plt.gca().ticklabel_format(useOffset=False)
In [122]:
# get the minimum Dst where 99% should be successes
ind99 = y_val >= 0.99
minDst99 = xp[ind99][0]
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 99% percent of a slot filling'.format(minDst99))
In [123]:
# one should be able to get estimates of the line uncertainity
ilu = np.empty((1000, len(xp)), dtype=float)
for ii, v in enumerate(np.random.random_integers(0, len(alpha.trace[:])-1, 1000)):
ilu[ii] = invlogit(alpha.trace[:][v] + beta.trace[:][v]*xp).value
In [124]:
xp = np.linspace(dst_bins_centers[ind].min(), dst_bins_centers[ind].max(), 100)
for v in ilu:
plt.plot(xp, v, alpha=.01, c='r')
a = alpha.stats()['mean']
b = beta.stats()['mean']
plt.plot(xp, invlogit(a + b*xp).value)
a = alpha.stats()['quantiles'][50]
b = beta.stats()['quantiles'][50]
plt.plot(xp, invlogit(a + b*xp).value, c='y')
plt.scatter(dst_bins_centers[ind], success[ind]/n_events[ind], s=50);
plt.xlabel('Minimum Dst')
plt.ylabel('Probability slot is filled')
Out[124]:
Same as previous figure with the red lines overlayed as 100 joint draws from the model posterior in order to show spread.
In [ ]:
In [ ]:
In [ ]:
In [ ]: