In [1]:
import datetime
print(datetime.datetime.now().isoformat())
Using a statistical model of the relationship between the slot filling at 464keV vs Dst.
We model the number of slot filling events as a random sample from a binomial distribution meaning that there is a probability of the event occuring and there may be a parameter that changes the probability. This is not meant as a correlation but as a success failure creiteria.
From Wikipedia: In probability theory and statistics, the binomial distribution with parameters n and p is the discrete probability distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields success with probability p.
$ change \sim Bin(n,p) \\ logit(p) = \alpha + \beta x \\ a \sim N(0,5) \\ \beta \sim N(0,10) $
where we set vague priors for $\alpha$ and $\beta$, the parameters for the logistic model.
This is the same technique used in the estimation of deaths due to a concentration of a chemical.
In [5]:
# http://onlinelibrary.wiley.com/doi/10.1002/2016JA022652/epdf
import pymc3
import tqdm
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt
import spacepy.plot as spp
import seaborn as sns
sns.set(font_scale=1.5)
Data delivered by Geoff Reeves 9/6/2016
In [6]:
# min_Dst, min_L
data = np.asarray([
65.000, 3.8000,
50.000, 3.7000,
67.000, 3.5000,
61.000, 3.4000,
77.000, 3.2000,
99.000, 2.8900,
87.000, 2.8000,
98.000, 2.8000,
96.000, 2.8000,
93.000, 2.3000,
92.000, 2.3000,
225.00, 2.3000,
206.00, 2.3000,
125.00, 2.3000]).reshape((-1,2))
dst = data[:,0]
minL = data[:,1]
print(dst, minL, data.dtype)
In [39]:
# make bins in Dst
dst_bins = np.arange(25, 300, 10)
print(dst_bins)
dst_bins_centers = np.asarray([dst_bins[:-1] + np.diff(dst_bins)/2]).T[:,0]
print(dst_bins_centers, dst_bins_centers.shape)
n_events_dig = np.digitize(dst, dst_bins)
print(n_events_dig)
n_events = np.zeros(len(dst_bins)-1)
success = np.zeros_like(n_events)
for i, v in enumerate(np.unique(n_events_dig)):
n_events[v-1] = np.sum(n_events_dig==v)
success[v-1] = np.sum(minL[n_events_dig==v] <= 2.4)
print(n_events)
print(success)
In [40]:
plt.plot(dst, minL, 'o')
plt.xlim((25, 250))
plt.ylim((2.2, 4.0))
plt.xlabel('Min Dst')
plt.ylabel('Min L Shell')
Out[40]:
In [41]:
plt.plot(dst, minL, 'o')
plt.xlim((25, 250))
plt.ylim((2.2, 4.0))
plt.xlabel('Min Dst')
plt.ylabel('Min L Shell')
for v in dst_bins:
plt.axvline(v, lw=0.25)
plt.axhline(2.4, c='r', lw=1)
Out[41]:
Setup the Bayesian model accorind to the description above. Run the Markov chain monte carlo (MCMC) to sample the posterior distributions for $\alpha$ and $\beta$
In [42]:
ind = n_events > 0
for i, j in zip(success[ind], n_events[ind]):
print(i,j)
In [69]:
# define priors
# these are wide uninformative priors
# alpha = pymc.Normal('alpha', mu=0, tau=1.0/5**2)
# beta = pymc.Normal('beta', mu=0, tau=1.0/10**2)
with pymc3.Model() as model:
alpha = pymc3.Uniform('alpha', -100, 100)
beta = pymc3.Uniform('beta', -100, 100)
# cannot feed in zero events
ind = n_events > 0
# define likelihood
p = pymc3.math.invlogit(alpha + beta*dst_bins_centers[ind])
y = pymc3.Binomial('y_obs', n=n_events[ind], p=p, observed=success[ind])
step = pymc3.Metropolis([alpha, beta])
trace = pymc3.sample(10000, njobs=5, tune=5000, step=step)
Make diagnostic plots of the posteriour distributions as created using MCMC.
In [70]:
pymc3.traceplot(trace, combined=True)
Out[70]:
In [71]:
pymc3.summary(trace)
Out[71]:
In [72]:
xp = np.linspace(dst_bins_centers[ind].min(), dst_bins_centers[ind].max(), 100)
a = trace['alpha'].mean()
b = trace['beta'].mean()
y_val = pymc3.math.invlogit(a + b*xp).eval()
plt.plot(xp, y_val)
plt.scatter(dst_bins_centers[ind], success[ind]/n_events[ind], s=50);
plt.xlabel('Minimum Dst')
plt.ylabel('Probability slot is filled')
plt.gca().ticklabel_format(useOffset=False)
In [73]:
# get the minimum Dst where 99% should be successes
for percentage in [50,75,90,95,99]:
ind99 = y_val >= percentage/100
minDst99 = xp[ind99][0]
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a {1}% percent of a slot filling at this energy'.format(minDst99, percentage))
In [ ]:
In [78]:
pymc3.math.invlogit(trace['alpha'][4] + trace['beta'][4]*xp).eval()
Out[78]:
In [89]:
Out[89]:
In [90]:
# one should be able to get estimates of the line uncertainity
ilu = np.empty((1000, len(xp)), dtype=float)
ilu = []
for ii, v in tqdm.tqdm(enumerate(np.random.randint(0, len(trace['alpha']), 100)), total=100):
if trace['alpha'][v] == 0:
continue
if trace['beta'][v] == 0:
continue
ilu.append(pymc3.math.invlogit(trace['alpha'][v] + trace['beta'][v]*xp).eval())
ilu=np.asarray(ilu)
ilu.shape
Out[90]:
In [95]:
plt.figure(figsize=(8,5))
xp = np.linspace(dst_bins_centers[ind].min(), dst_bins_centers[ind].max(), 100)
for v in ilu:
plt.plot(xp, v, alpha=.3, c='r')
plt.plot(xp, np.percentile(ilu, [25, 50, 75],axis=0).T, c='k', lw=3)
# a = trace['alpha'].mean()
# b = trace['beta'].mean()
# plt.plot(xp, invlogit(a + b*xp).value)
# a = trace['alpha'].stats()['quantiles'][50]
# b = trace['beta'].stats()['quantiles'][50]
# plt.plot(xp, invlogit(a + b*xp).value, c='y')
plt.scatter(dst_bins_centers[ind], success[ind]/n_events[ind], s=50);
plt.xlabel('Minimum Dst')
plt.ylabel('Probability slot is filled')
Out[95]:
Same as previous figure with the red lines overlayed as 100 joint draws from the model posterior in order to show spread.
In [98]:
xp.shape, np.percentile(ilu, [25, 50, 75],axis=0).T.shape
Out[98]:
In [109]:
ii50 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,0] > 0.5)[0]
ii75 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,0] > 0.75)[0]
ii90 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,0] > 0.90)[0]
ii95 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,0] > 0.95)[0]
ii99 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,0] > 0.99)[0]
In [110]:
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 50% percent of a slot filling at this energy'.format(xp[ii50[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 75% percent of a slot filling at this energy'.format(xp[ii75[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 90% percent of a slot filling at this energy'.format(xp[ii90[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 95% percent of a slot filling at this energy'.format(xp[ii95[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 99% percent of a slot filling at this energy'.format(xp[ii99[0]]))
In [111]:
ii50 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,1] > 0.5)[0]
ii75 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,1] > 0.75)[0]
ii90 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,1] > 0.90)[0]
ii95 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,1] > 0.95)[0]
ii99 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,1] > 0.99)[0]
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 50% percent of a slot filling at this energy'.format(xp[ii50[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 75% percent of a slot filling at this energy'.format(xp[ii75[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 90% percent of a slot filling at this energy'.format(xp[ii90[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 95% percent of a slot filling at this energy'.format(xp[ii95[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 99% percent of a slot filling at this energy'.format(xp[ii99[0]]))
In [112]:
ii50 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,2] > 0.5)[0]
ii75 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,2] > 0.75)[0]
ii90 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,2] > 0.90)[0]
ii95 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,2] > 0.95)[0]
ii99 = np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,2] > 0.99)[0]
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 50% percent of a slot filling at this energy'.format(xp[ii50[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 75% percent of a slot filling at this energy'.format(xp[ii75[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 90% percent of a slot filling at this energy'.format(xp[ii90[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 95% percent of a slot filling at this energy'.format(xp[ii95[0]]))
print('At a minimum Dst of {0:0.0f}nT it is predicted to have a 99% percent of a slot filling at this energy'.format(xp[ii99[0]]))
In [116]:
v50 = [xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,2] > 0.5)[0][0]],
xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,1] > 0.5)[0][0]],
xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,0] > 0.5)[0][0]]]
v75 = [xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,2] > 0.75)[0][0]],
xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,1] > 0.75)[0][0]],
xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,0] > 0.75)[0][0]]]
v90 = [xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,2] > 0.90)[0][0]],
xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,1] > 0.90)[0][0]],
xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,0] > 0.90)[0][0]]]
v95 = [xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,2] > 0.95)[0][0]],
xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,1] > 0.95)[0][0]],
xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,0] > 0.95)[0][0]]]
v99 = [xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,2] > 0.99)[0][0]],
xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,1] > 0.99)[0][0]],
xp[np.where(np.percentile(ilu, [25, 50, 75],axis=0).T[:,0] > 0.99)[0][0]]]
In [117]:
v50, v75, v90, v95, v99
Out[117]:
In [129]:
print("a 50% probability of slot filling for {0:.0f}<dst<{1:.0f}".format(v50[0], v50[2]))
print("a 75% probability of slot filling for {0:.0f}<dst<{1:.0f}".format(v75[0], v75[2]))
print("a 90% probability of slot filling for {0:.0f}<dst<{1:.0f}".format(v90[0], v90[2]))
print("a 95% probability of slot filling for {0:.0f}<dst<{1:.0f}".format(v95[0], v95[2]))
print("a 99% probability of slot filling for {0:.0f}<dst<{1:.0f}".format(v99[0], v99[2]))
In [124]:
Out[124]:
In [ ]: