Assume you have a variable mu that is distributed as a normal distrbution, Y ~ N(mu, var) where "~" means is distributed as, mu is the expected value, and var is the variance error of the instrument (std or sigma is the standard deviation).
Then assume that mu is a linear function of dependent parameters: alpha, beta1, and beta2, and independent parameters X1 and X2, i.e. mu = alpha + beta1X1 + beta2X2
For this model, assume a weakly informed (uninformed) prior.
In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pymc3 as pm
from scipy import optimize
%matplotlib inline
In [2]:
# Intialize random number generator so we get the same set of values after every iteration.
np.random.seed(123)
In [3]:
# The true dependent parameter values:
alpha = 1; sigma =1; beta = [1, 2.5]
In [4]:
n = 100 # Number of data points
X1 = np.random.randn(n) # Numpy's random normal distribution.
X2 = np.random.randn(n) * 0.2 # scale param
In [5]:
# Simulate observations
Y = alpha + beta[0]*X1 + beta[1]*X2 + np.random.randn(n)*sigma
In [6]:
fig, axes = plt.subplots(1, 2, sharex=True, figsize=(10,4))
axes[0].scatter(X1, Y)
axes[1].scatter(X2, Y)
axes[0].set_ylabel('Y (observation)'); axes[0].set_xlabel('X1 (independent var)'); axes[1].set_xlabel('X2 (independent var)');
In [7]:
basic_model = pm.Model()
with basic_model: # This is a context manager (all command inside the width statement will be added to pm.Model(), and stay with it)
# Prior distributions of unknowns for mu. These are STOCASTIC variables.
alpha = pm.Normal('alpha', mu=0, sd=10)
beta = pm.Normal('beta', mu=0, sd=10, shape=2)
sigma = pm.HalfNormal('sigma', sd=1)
# Define the expected value of outcome (the functional shape of what we will observe, DO NOT confuse this with the
# normal distribution that is assosiated with the uncertainty!).
# This value is DETERMANISTIC (depends on the parent values that can be stocastic)
mu = alpha + beta[0]*X1 + beta[1]*X2
# Likelihood (sampling distribution) of observations
# Observed stocastic, the data likeleyhood, with the observed values of Y must be untouched.
Y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=Y) #
In [8]:
map_estimate = pm.find_MAP(model=basic_model)
map_estimate
Out[8]:
In [9]:
with basic_model:
trace = pm.sample(500) # Draw 500 posterior samples suing NUTS
In [10]:
trace['beta']
Out[10]:
In [11]:
_ = pm.traceplot(trace)
In [12]:
pm.summary(trace)
Out[12]:
In [ ]: