Also look to: Adams RP, Murray I, MacKay DJC. Tractable nonparametric Bayesian inference in Poisson processes with Gaussian process intensities. Proceedings of the 26th Annual International Conference on Machine Learning; Montreal, Quebec, Canada. 1553376: ACM; 2009. p. 9-16.
Some thoughts 20171018
In [1]:
%matplotlib inline
from pprint import pprint
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as mc
import spacepy.toolbox as tb
import spacepy.plot as spp
import tqdm
from scipy import stats
import seaborn as sns
sns.set(font_scale=1.5)
# matplotlib.pyplot.rc('figure', figsize=(10,10))
# matplotlib.pyplot.rc('lines', lw=3)
# matplotlib.pyplot.rc('font', size=20)
%matplotlib inline
For each interval choose $n$ events from a Poisson. Then draw from a uniform the location in the interval for each of the events.
In [2]:
np.random.seed(8675309)
nT = 400
cts = np.random.poisson(20, size=nT)
edata = []
for i in range(nT):
edata.extend(i + np.sort(np.random.uniform(low=0, high=1, size=cts[i])))
edata = np.asarray(edata)
edata.shape
Out[2]:
In [3]:
plt.plot(edata, np.arange(len(edata)))
plt.xlabel('Time of event')
plt.ylabel('Event number')
plt.title("Modeled underlying data")
Out[3]:
In [4]:
with mc.Model() as model:
lam = mc.Uniform('lambda', 0, 1000) # this is the exponential parameter
meas = mc.Exponential('meas', lam, observed=np.diff(edata))
lam2 = mc.Uniform('lam2', 0, 1000)
poi = mc.Poisson('Poisson', lam2, observed=cts)
start = mc.find_MAP()
trace = mc.sample(10000, start=start, njobs=8)
In [5]:
mc.traceplot(trace, combined=True, lines={'lambda':20, 'lam2':20})
mc.summary(trace)
In [6]:
fig, ax = plt.subplots(ncols=1, nrows=2, sharex=True)
sns.distplot(trace['lambda'], ax=ax[0])
sns.distplot(trace['lam2'], ax=ax[1])
plt.xlabel('Lambda')
ax[0].set_ylabel('Exp')
ax[1].set_ylabel('Poisson')
ax[0].axvline(20, c='r', lw=1)
ax[1].axvline(20, c='r', lw=1)
plt.tight_layout()
This is consistent with a Poisson of parameter 20! But there seems to be an under prediction going on, wonder why?
Go through Posterior Predictive Checks (http://docs.pymc.io/notebooks/posterior_predictive.html) and see if we are reprodicting the mean and variance.
In [7]:
ppc = mc.sample_ppc(trace, samples=500, model=model, size=100)
In [8]:
ax = plt.subplot()
sns.distplot([n.mean() for n in ppc['Poisson']], kde=False, ax=ax)
ax.axvline(cts.mean())
ax.set(title='Posterior predictive of the mean (Poisson)', xlabel='mean(x)', ylabel='Frequency');
In [9]:
ax = plt.subplot()
sns.distplot([n.var() for n in ppc['Poisson']], kde=False, ax=ax)
ax.axvline(cts.var())
ax.set(title='Posterior predictive of the variance (Poisson)', xlabel='var(x)', ylabel='Frequency');
In [10]:
ax = plt.subplot()
sns.distplot([n.mean() for n in ppc['meas']], kde=False, ax=ax)
ax.axvline(np.diff(edata).mean())
ax.set(title='Posterior predictive of the mean (Exponential)', xlabel='mean(x)', ylabel='Frequency');
In [11]:
ax = plt.subplot()
sns.distplot([n.var() for n in ppc['meas']], kde=False, ax=ax)
ax.axvline(np.diff(edata).var())
ax.set(title='Posterior predictive of the variance (Exponential)', xlabel='var(x)', ylabel='Frequency');
We are reprodicting well.
Correction should look like $n_1 = \frac{R_1}{1-R_1 \tau}$ where $n_1$ is real rate, $R_1$ is observed rate, and $\tau$ is the dead time.
Take edata from above and strep through from beginning to end only keeping points that are dead time away from the previous point.
In [12]:
deadtime1 = 0.005 # small dead time
deadtime2 = 0.1 # large dead time
edata_td1 = []
edata_td1.append(edata[0])
edata_td2 = []
edata_td2.append(edata[0])
for ii, v in enumerate(edata[1:], 1): # stop one shy to not run over the end, start enumerate at 1
if v - edata_td1[-1] >= deadtime1:
edata_td1.append(v)
if v - edata_td2[-1] >= deadtime2:
edata_td2.append(v)
edata_td1 = np.asarray(edata_td1)
edata_td2 = np.asarray(edata_td2)
In [13]:
plt.figure(figsize=(8,6))
plt.plot(edata, np.arange(len(edata)), label='Real data')
plt.plot(edata_td1, np.arange(len(edata_td1)), label='Small dead time')
plt.plot(edata_td2, np.arange(len(edata_td2)), label='Large dead time')
plt.xlabel('Time of event')
plt.ylabel('Event number')
plt.title("Modeled underlying data")
plt.legend(bbox_to_anchor=(1, 1))
Out[13]:
In [14]:
plt.figure(figsize=(8,6))
h1, b1 = np.histogram(edata, np.arange(1000))
plt.plot(tb.bin_edges_to_center(b1), h1, label='Real data', c='k')
h2, b2 = np.histogram(edata_td1, np.arange(1000))
plt.plot(tb.bin_edges_to_center(b2), h2, label='Small dead time', c='r')
h3, b3 = np.histogram(edata_td2, np.arange(1000))
plt.plot(tb.bin_edges_to_center(b3), h3, label='Large dead time')
plt.legend(bbox_to_anchor=(1, 1))
plt.xlim((0,400))
plt.ylabel('Rate')
plt.xlabel('Time')
Out[14]:
In [15]:
# assume R1 is Poisson
with mc.Model() as model:
tau = deadtime1
obsRate = mc.Uniform('obsRate', 0, 1000, shape=1)
obsData = mc.Poisson('obsData', obsRate, observed=h2[:400], shape=1)
realRate = mc.Deterministic('realRate', obsData/(1-obsData*tau))
start = mc.find_MAP()
trace = mc.sample(10000, start=start, njobs=8)
In [16]:
mc.traceplot(trace, combined=True, varnames=('obsRate', ))
mc.summary(trace, varnames=('obsRate', ))
In [17]:
sns.distplot(trace['realRate'].mean(axis=0), bins=10)
plt.xlabel('realRate')
plt.ylabel('Density')
dt1_bounds = np.percentile(trace['realRate'], (2.5, 50, 97.5))
print('The estimate of the real rate given that we know the dead time is:', dt1_bounds,
(dt1_bounds[2]-dt1_bounds[0])/dt1_bounds[1])
dat_bounds = np.percentile(h1[:400], (2.5, 50, 97.5))
print("This compares with if we measured without dead time as:", dat_bounds,
(dat_bounds[2]-dat_bounds[0])/dat_bounds[1])
In [18]:
# assume R1 is Poisson
with mc.Model() as model:
tau = deadtime2
obsRate = mc.Uniform('obsRate', 0, 1000)
obsData = mc.Poisson('obsData', obsRate, observed=h3[:400])
realRate = mc.Deterministic('realRate', obsData/(1-obsData*tau))
start = mc.find_MAP()
trace = mc.sample(10000, start=start, njobs=8)
In [19]:
mc.traceplot(trace, combined=True, varnames=('obsRate', ))
mc.summary(trace, varnames=('obsRate', ))
In [20]:
sns.distplot(trace['realRate'].mean(axis=0))
plt.xlabel('realRate')
plt.ylabel('Density')
dt2_bounds = np.percentile(trace['realRate'], (2.5, 50, 97.5))
print('The estimate of the real rate given that we know the dead time is:', dt1_bounds,
(dt2_bounds[2]-dt2_bounds[0])/dt2_bounds[1])
dat_bounds = np.percentile(h1[:400], (2.5, 50, 97.5))
print("This compares with if we measured without dead time as:", dat_bounds,
(dat_bounds[2]-dat_bounds[0])/dat_bounds[1])
But this is totally broken!!!
In [21]:
real = pd.Series(edata)
td1 = pd.Series(edata_td1)
td2 = pd.Series(edata_td2)
real.to_csv('no_deadtime_times.csv')
td1.to_csv('small_deadtime_times.csv')
td2.to_csv('large_deadtime_times.csv')
In [22]:
real = pd.Series(h1[h1>0])
td1 = pd.Series(h2[h2>0])
td2 = pd.Series(h3[h3>0])
real.to_csv('no_deadtime_rates.csv')
td1.to_csv('small_deadtime_rates.csv')
td2.to_csv('large_deadtime_rates.csv')
In [23]:
with mc.Model() as model:
BoundedExp = mc.Bound(mc.Exponential, lower=deadtime2, upper=None)
# we observe the following time between counts
lam = mc.Uniform('lam', 0, 1000)
time_between = BoundedExp('tb_ob', lam, observed=np.diff(edata_td2))
start = mc.find_MAP()
trace = mc.sample(10000, njobs=8, start=start)
In [ ]: