starting from https://docs.pymc.io/notebooks/GLM-linear.html
In [1]:
%matplotlib inline
from pymc3 import *
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
sns.set(font_scale=1.5)
In [2]:
size = 200
true_intercept = 1
true_slope = 2
x = np.linspace(0, 1, size)
# y = a + b*x
true_regression_line = true_intercept + true_slope * x
# add noise
y = true_regression_line + np.random.normal(scale=.5, size=size)
data = dict(x=x, y=y)
df = pd.DataFrame(data)
df.head()
Out[2]:
In [3]:
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, xlabel='x', ylabel='y', title='Generated data and underlying model')
ax.plot(x, y, 'x', label='sampled data')
ax.plot(x, true_regression_line, label='true regression line', lw=2.)
plt.legend(loc=0);
In [4]:
sns.lmplot('x','y', data=df)
Out[4]:
In [5]:
with Model() as model:
# specify glm and pass in data. The resulting linear model, its likelihood and
# and all its parameters are automatically added to our model.
glm.GLM.from_formula('y ~ x', data)
trace = sample(3000, cores=2) # draw 3000 posterior samples using NUTS sampling
In [6]:
plt.figure(figsize=(7, 7))
traceplot(trace[100:])
plt.tight_layout();
In [7]:
plt.figure(figsize=(7, 7))
plt.plot(x, y, 'x', label='data')
plot_posterior_predictive_glm(trace, samples=100,
label='posterior predictive regression lines')
plt.plot(x, true_regression_line, label='true regression line', lw=3., c='y')
plt.title('Posterior predictive regression lines')
plt.legend(loc=0)
plt.xlabel('x')
plt.ylabel('y');
In [8]:
df = pd.read_csv('http://stats.idre.ucla.edu/stat/data/poisson_sim.csv', index_col=0)
df['x'] = df['math']
df['y'] = df['num_awards']
df.head()
Out[8]:
In [9]:
df.plot(kind='scatter', x='math', y='num_awards')
Out[9]:
In [10]:
with Model() as model:
# specify glm and pass in data. The resulting linear model, its likelihood and
# and all its parameters are automatically added to our model.
glm.GLM.from_formula('y ~ x', df)
trace = sample(3000, cores=2) # draw 3000 posterior samples using NUTS sampling
In [11]:
plt.figure(figsize=(7, 7))
traceplot(trace[100:])
plt.tight_layout();
In [12]:
fig, ax = plt.subplots(figsize=(7, 7))
df.plot(kind='scatter', x='x', y='y', ax=ax)
plot_posterior_predictive_glm(trace, eval=np.linspace(0, 80, 100), samples=100)
In [13]:
with Model() as model:
# specify glm and pass in data. The resulting linear model, its likelihood and
# and all its parameters are automatically added to our model.
glm.GLM.from_formula('y ~ x', df, family=glm.families.NegativeBinomial())
step = NUTS()
trace = sample(3000, cores=2, step=step) # draw 3000 posterior samples using NUTS sampling
In [14]:
plt.figure(figsize=(7, 7))
traceplot(trace[100:])
plt.tight_layout();
In [15]:
autocorrplot(trace);
In [16]:
fig, ax = plt.subplots(figsize=(7, 7))
df.plot(kind='scatter', x='x', y='y', ax=ax)
plot_posterior_predictive_glm(trace, eval=np.linspace(0, 80, 100), samples=100)
In [ ]: