In [1]:
import numpy as np, pandas as pd, dismod_mr, pymc as pm, matplotlib.pyplot as plt, seaborn as sns
%matplotlib inline
In [2]:
# set a random seed to ensure reproducible simulation results
np.random.seed(123456)
In [3]:
# simulate data
n = 20
data = dict(age=np.random.randint(0, 10, size=n)*10,
year=np.random.randint(1990, 2010, size=n))
data = pd.DataFrame(data)
data['value'] = (.1 + .001 * data.age) + np.random.normal(0., .01, size=n)
data['data_type'] = 'p'
data['age_start'] = data.age
data['age_end'] = data.age+10
# for prettier display, include jittered age near midpoint of age interval
data['jittered_age'] = .5*(data.age_start + data.age_end) + np.random.normal(size=n)
# keep things simple, no spatial random effects, no sex effect
data['area'] = 'all'
data['sex'] = 'total'
# quantification of uncertainty that says these numbers are believed to be quite precise
data['standard_error'] = -99
data['upper_ci'] = np.nan
data['lower_ci'] = np.nan
data['effective_sample_size'] = 1.e8
def new_model(data):
# build the dismod_mr model
dm = dismod_mr.data.ModelData()
# set simple model parameters, for decent, fast computation
dm.set_knots('p', [0,100])
dm.set_level_bounds('p', lower=0, upper=1)
dm.set_level_value('p', age_before=0, age_after=100, value=0)
dm.set_heterogeneity('p', value='Slightly')
dm.set_effect_prior('p', cov='x_sex', value=dict(dist='Constant', mu=0))
# copy data into model
dm.input_data = data.copy()
return dm
In [4]:
dm1 = new_model(data)
dm1.setup_model('p', rate_model='neg_binom')
%time dm1.fit(how='mcmc', iter=10, burn=0, thin=1)
dm1.plot()
Fitting it again gives a different answer:
In [5]:
dm2 = new_model(data)
dm2.setup_model('p', rate_model='neg_binom')
%time dm2.fit(how='mcmc', iter=10, burn=0, thin=1)
dm2.plot()
In [6]:
dm1.vars['p']['gamma'][1].trace().mean()
Out[6]:
In [7]:
dm2.vars['p']['gamma'][1].trace().mean()
Out[7]:
In [8]:
dm1 = new_model(data)
dm1.setup_model('p', rate_model='neg_binom')
%time dm1.fit(how='mcmc', iter=10_000, burn=5_000, thin=5)
In [9]:
dm2 = new_model(data)
dm2.setup_model('p', rate_model='neg_binom')
%time dm2.fit(how='mcmc', iter=10_000, burn=5_000, thin=5)
In [10]:
dm1.vars['p']['gamma'][1].trace().mean()
Out[10]:
In [11]:
dm2.vars['p']['gamma'][1].trace().mean()
Out[11]:
In [12]:
dismod_mr.plot.plot_trace(dm1)
In [13]:
dismod_mr.plot.plot_acorr(dm1)
In [14]:
# setup a model and run the chain once
dm = new_model(data)
dm.setup_model('p', rate_model='neg_binom')
%time dm.fit(how='mcmc', iter=2_000, burn=1_000, thin=1)
In [15]:
# to run it more times, use the sample method of the dm.mcmc object
# use the same iter/burn/thin settings for future convenience
for i in range(4):
dm.mcmc.sample(iter=2_000, burn=1_000, thin=1)
In [16]:
# calculate Gelman-Rubin statistic for all model variables
R_hat = pm.gelman_rubin(dm.mcmc)
# examine for gamma_p_100
R_hat['gamma_p_100']
Out[16]:
In [17]:
!date