pymc3_examples



In [1]:
import numpy as np
import matplotlib.pyplot as plt

# Intialize random number generator
np.random.seed(123)

# True parameter values
alpha, sigma = 1, 1
beta = [1, 2.5]

# Size of dataset
size = 100

# Predictor variable
X1 = np.linspace(0, 1, size)
X2 = np.linspace(0,.2, size)

# Simulate outcome variable
Y = alpha + beta[0]*X1 + beta[1]*X2 + np.random.randn(size)*sigma

In [2]:
%matplotlib inline

fig, axes = plt.subplots(1, 2, sharex=True, figsize=(10,4))
axes[0].scatter(X1, Y)
axes[1].scatter(X2, Y)
axes[0].set_ylabel('Y'); axes[0].set_xlabel('X1'); axes[1].set_xlabel('X2');



In [3]:
from pymc3 import Model, Normal, HalfNormal

In [4]:
basic_model = Model()

with basic_model:

    # Priors for unknown model parameters
    alpha = Normal('alpha', mu=0, sd=10)
    beta = Normal('beta', mu=0, sd=10, shape=2)
    sigma = HalfNormal('sigma', sd=1)

    # Expected value of outcome
    mu = alpha + beta[0]*X1 + beta[1]*X2

    # Likelihood (sampling distribution) of observations
    Y_obs = Normal('Y_obs', mu=mu, sd=sigma, observed=Y)

In [22]:
basic_model = Model()

with basic_model:

    # Priors for unknown model parameters
    alpha = Normal('alpha', mu=0, sd=10)
    beta = Normal('beta', mu=0, sd=10, shape=2)
    sigma = HalfNormal('sigma', sd=1)

    # Expected value of outcome
    mu = alpha + beta[0]*X1 + beta[1]*X2

    # Likelihood (sampling distribution) of observations
    Y = Normal('Y_obs', mu=mu, sd=sigma)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-22-c25c4c7ed7fb> in <module>()
     12 
     13     # Likelihood (sampling distribution) of observations
---> 14     Y = Normal('Y_obs', mu=mu, sd=sigma)

/home/downey/anaconda/lib/python2.7/site-packages/pymc3/distributions/distribution.pyc in __new__(cls, name, *args, **kwargs)
     20             data = kwargs.pop('observed', None)
     21             dist = cls.dist(*args, **kwargs)
---> 22             return model.Var(name, dist, data)
     23         elif name is None:
     24             return object.__new__(cls) #for pickle

/home/downey/anaconda/lib/python2.7/site-packages/pymc3/model.pyc in Var(self, name, dist, data)
    224         if data is None:
    225             if getattr(dist, "transform", None) is None:
--> 226                 var = FreeRV(name=name, distribution=dist, model=self)
    227                 self.free_RVs.append(var)
    228             else:

/home/downey/anaconda/lib/python2.7/site-packages/pymc3/model.pyc in __init__(self, type, owner, index, name, distribution, model)
    436             self.tag.test_value = np.ones(
    437                 distribution.shape, distribution.dtype) * distribution.default()
--> 438             self.logp_elemwiset = distribution.logp(self)
    439             self.model = model
    440 

/home/downey/anaconda/lib/python2.7/site-packages/pymc3/distributions/continuous.pyc in logp(self, value)
    171 
    172         return bound(
--> 173             (-tau * (value - mu) ** 2 + log(tau / pi / 2.)) / 2.,
    174             tau > 0,
    175             sd > 0

/home/downey/anaconda/lib/python2.7/site-packages/theano/tensor/var.pyc in __sub__(self, other)
    145         # and the return value in that case
    146         try:
--> 147             return theano.tensor.basic.sub(self, other)
    148         except (NotImplementedError, AsTensorError):
    149             return NotImplemented

/home/downey/anaconda/lib/python2.7/site-packages/theano/gof/op.pyc in __call__(self, *inputs, **kwargs)
    505         """
    506         return_list = kwargs.pop('return_list', False)
--> 507         node = self.make_node(*inputs, **kwargs)
    508 
    509         if config.compute_test_value != 'off':

/home/downey/anaconda/lib/python2.7/site-packages/theano/tensor/elemwise.pyc in make_node(self, *inputs)
    543                     input.type.broadcastable,
    544                     ['x'] * difference + range(length),
--> 545                     inplace=False)(input))
    546         inputs = args
    547 

/home/downey/anaconda/lib/python2.7/site-packages/theano/gof/op.pyc in __call__(self, *inputs, **kwargs)
    515             for i, ins in enumerate(node.inputs):
    516                 try:
--> 517                     storage_map[ins] = [self._get_test_value(ins)]
    518                     compute_map[ins] = [True]
    519                 except AttributeError:

/home/downey/anaconda/lib/python2.7/site-packages/theano/gof/op.pyc in _get_test_value(cls, v)
    452             # ensure that the test value is correct
    453             try:
--> 454                 ret = v.type.filter(v.tag.test_value)
    455             except Exception, e:
    456                 # Better error message.

/home/downey/anaconda/lib/python2.7/site-packages/theano/tensor/type.pyc in filter(self, data, strict, allow_downcast)
    167             raise TypeError("Wrong number of dimensions: expected %s,"
    168                             " got %s with shape %s." % (self.ndim, data.ndim,
--> 169                                                         data.shape))
    170         if not data.flags.aligned:
    171             try:

TypeError: For compute_test_value, one input test value does not have the requested type.

The error when converting the test value to that variable type:
Wrong number of dimensions: expected 0, got 1 with shape (100,).

In [23]:
alpha = Normal('alpha', mu=0, sd=10)
beta = Normal('beta', mu=0, sd=10, shape=2)
sigma = HalfNormal('sigma', sd=1)

# Expected value of outcome
mu = alpha + beta[0]*X1 + beta[1]*X2

# Likelihood (sampling distribution) of observations
Y = Normal('Y_obs', mu=mu, sd=sigma)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-23-0fffde8373ce> in <module>()
----> 1 alpha = Normal('alpha', mu=0, sd=10)
      2 beta = Normal('beta', mu=0, sd=10, shape=2)
      3 sigma = HalfNormal('sigma', sd=1)
      4 
      5 # Expected value of outcome

/home/downey/anaconda/lib/python2.7/site-packages/pymc3/distributions/distribution.pyc in __new__(cls, name, *args, **kwargs)
     15             model = Model.get_context()
     16         except TypeError:
---> 17             raise TypeError("No model on context stack, which is needed to use the Normal('x', 0,1) syntax. Add a 'with model:' block")
     18 
     19         if isinstance(name, str):

TypeError: No model on context stack, which is needed to use the Normal('x', 0,1) syntax. Add a 'with model:' block

In [5]:
from pymc3 import find_MAP

map_estimate = find_MAP(model=basic_model)

print(map_estimate)


{'alpha': array(1.01366409951285), 'beta': array([ 1.46791595,  0.29358319]), 'sigma_log': array(0.11928764983495602)}

In [7]:
from scipy import optimize
from pymc3 import NUTS, sample

with basic_model:

    # obtain starting values via MAP
    start = find_MAP(fmin=optimize.fmin_powell)

    # instantiate sampler
    step = NUTS(scaling=start)

    # draw 2000 posterior samples
    trace = sample(2000, step, start=start)


 [-----------------100%-----------------] 2000 of 2000 complete in 5.4 sec
/home/downey/anaconda/lib/python2.7/site-packages/theano/gof/cmodule.py:293: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility
  rval = __import__(module_name, {}, {}, [module_name])

In [8]:
from pymc3 import traceplot

traceplot(trace);



In [9]:
from pymc3 import summary

summary(trace)


alpha:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.999            0.234            0.007            [0.535, 1.424]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  0.543          0.844          1.002          1.153          1.454


beta:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  1.638            2.107            0.128            [-2.402, 5.976]
  -0.436           10.226           0.622            [-21.629, 19.058]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  -2.333         0.188          1.580          2.918          6.062
  -21.700        -6.718         -0.462         6.751          19.007


sigma_log:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.141            0.077            0.002            [-0.004, 0.293]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  -0.001         0.087          0.140          0.191          0.300


sigma:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  1.155            0.089            0.002            [0.989, 1.332]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  0.999          1.091          1.150          1.210          1.349


In [13]:
type(basic_model)


Out[13]:
pymc3.model.Model

In [14]:
type(trace)


Out[14]:
pymc3.backends.base.MultiTrace

In [15]:
dir(trace)


Out[15]:
['__class__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__format__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__hash__',
 '__init__',
 '__len__',
 '__module__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_attrs',
 '_slice',
 '_straces',
 'chains',
 'get_values',
 'nchains',
 'point',
 'varnames']

In [17]:
trace.varnames


Out[17]:
['alpha', 'beta', 'sigma_log', 'sigma']

In [18]:
trace.get_values('alpha')


Out[18]:
array([ 1.00911214,  1.00911214,  1.0349891 , ...,  1.17301994,
        1.01888131,  0.94337901])

In [19]:
dir(basic_model)


Out[19]:
['Var',
 'Y_obs',
 '__class__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__enter__',
 '__exit__',
 '__format__',
 '__getattribute__',
 '__getitem__',
 '__hash__',
 '__init__',
 '__module__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'add_random_variable',
 'alpha',
 'basic_RVs',
 'beta',
 'cont_vars',
 'contexts',
 'd2logp',
 'deterministics',
 'disc_vars',
 'dlogp',
 'fastd2logp',
 'fastdlogp',
 'fastfn',
 'fastlogp',
 'fn',
 'free_RVs',
 'get_context',
 'get_contexts',
 'logp',
 'logp_elemwise',
 'logpt',
 'makefn',
 'missing_values',
 'model',
 'named_vars',
 'observed_RVs',
 'potentials',
 'profile',
 'sigma_log',
 'test_point',
 'unobserved_RVs',
 'vars']

In [20]:
basic_model.observed_RVs


Out[20]:
[Y_obs]

In [21]:
basic_model.unobserved_RVs


Out[21]:
[alpha, beta, sigma_log, sigma]

In [ ]: