In [1]:

    
%matplotlib inline

from pprint import pprint

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc
import spacepy.toolbox as tb
import spacepy.plot as spp









    



This unreleased version of SpacePy is not supported by the SpacePy team.






    



/Users/balarsen/miniconda3/envs/python3/lib/python3.5/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))

Setup some test data to use

This is from a Poisson dist with mean 50



In [9]:

    
np.random.seed(8675309)
test_data_dist = pymc.Poisson('Test_Data', mu=50)

test_data = [test_data_dist.random() for v in range(10000)]
# print(d_tmp)
plt.hist(test_data, 10)









    Out[9]:





(array([    9.,   161.,   652.,  2301.,  2803.,  2249.,  1374.,   351.,
           88.,    12.]),
 array([ 25. ,  30.3,  35.6,  40.9,  46.2,  51.5,  56.8,  62.1,  67.4,
         72.7,  78. ]),
 <a list of 10 Patch objects>)



In [10]:

    
rate = pymc.Uniform('mean', lower=0, upper=1000)
data = pymc.Poisson('Data', mu=rate, observed=True, value=test_data[::100])

model = pymc.MCMC((rate, data))



In [11]:

    
model.sample(iter=10000, burn=1000, verbose=0, progress_bar=1, burn_till_tuned=True, thin=10)









    



 [------------------------------------201%------------------------------------] 28143 of 14000 complete in 1.0 sec



In [12]:

    
pprint(model.stats())
pymc.Matplot.plot(model)









    



{'mean': {'95% HPD interval': array([ 48.99394806,  51.62555978]),
          'mc error': 0.021721506216243316,
          'mean': 50.407994540338031,
          'n': 900,
          'quantiles': {2.5: 49.094770605574844,
                        25: 49.932985495720011,
                        50: 50.402570205060314,
                        75: 50.904732180966597,
                        97.5: 51.771866267555204},
          'standard deviation': 0.67507229773265665}}
Plotting mean



In [13]:

    
ans = []

for n_iter in tb.logspace(1e1, 1e6, 10):
    print('%%%%%%%', n_iter)
    del model
    model = pymc.MCMC((rate, data))
    model.sample(iter=10000, burn=1000, verbose=0, thin=10, progress_bar=1, burn_till_tuned=True)
    ans.append((n_iter, model.stats()))









    



%%%%%%% 10.0
 [-----------------100%-----------------] 14000 of 14000 complete in 0.5 sec%%%%%%% 35.938136638
 [-----------------100%-----------------] 14096 of 14000 complete in 0.5 sec%%%%%%% 129.154966501
 [-----------------------------------198%------------------------------------] 27829 of 14000 complete in 1.0 sec%%%%%%% 464.158883361
 [-----------------------------------199%------------------------------------] 27993 of 14000 complete in 1.0 sec%%%%%%% 1668.1005372
 [-----------------101%-----------------] 14202 of 14000 complete in 0.5 sec%%%%%%% 5994.84250319
 [------------------------------------200%------------------------------------] 28104 of 14000 complete in 1.0 sec%%%%%%% 21544.3469003
 [-----------------------------------199%------------------------------------] 27952 of 14000 complete in 1.0 sec%%%%%%% 77426.3682681
 [-----------------101%-----------------] 14183 of 14000 complete in 0.5 sec%%%%%%% 278255.940221
 [-----------------------------------196%-----------------------------------] 27481 of 14000 complete in 1.0 sec%%%%%%% 1000000.0
 [-----------------101%-----------------] 14171 of 14000 complete in 0.5 sec



In [19]:

    
n_iter = [v[0] for v in ans]
plt_range = [v[1]['mean']['95% HPD interval'] for v in ans]
plt.figure()
plt.loglog(n_iter, plt_range)
plt.ylim((4e1, 6e1))
plt.loglog(n_iter, [v[1]['mean']['mean'] for v in ans], label='mean')
plt.legend()
# plt.gca().locator_params(axis='y',nbins=10)
plt.yscale('linear')

now what does a coincidence do?



In [ ]: