In [1]:
    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pymc3 as pm
import theano.tensor as tt
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
from matplotlib import gridspec
from IPython.display import Image
%matplotlib inline
plt.style.use('seaborn-white')
color = '#87ceeb'
f_dict = {'size':14}
    
In [2]:
    
def gammaShRaFromModeSD(mode, sd):
    """Calculate Gamma shape and rate from mode and sd."""
    rate = (mode + np.sqrt( mode**2 + 4 * sd**2 ) ) / ( 2 * sd**2 )
    shape = 1 + mode * rate
    return(shape, rate)
    
In [3]:
    
df = pd.read_csv('data/HairEyeColor.csv', dtype={'Hair':'category', 'Eye':'category'})
df.info()
    
    
In [4]:
    
df['Prop'] = df.Count.apply(lambda x: x/df.Count.sum())
df.head()
    
    Out[4]:
In [5]:
    
df_pivot = df.pivot('Eye', 'Hair')
df_pivot
    
    Out[5]:
In [6]:
    
Image('images/fig24_2.png')
    
    Out[6]:
In [7]:
    
y = df.Count
x1 = df.Eye.cat.codes.values
x2 = df.Hair.cat.codes.values
Nx1Lvl = len(df.Eye.cat.categories)
Nx2Lvl = len(df.Hair.cat.categories)
Ncell = df.Count.size
yLogMean = np.log(np.sum(y)/(Nx1Lvl*Nx2Lvl))
yLogSD = np.log(np.std(np.r_[np.repeat([0], Ncell-1), np.sum(y)], ddof=1))
agammaShRa = gammaShRaFromModeSD(yLogSD, 2*yLogSD)
with pm.Model() as poisson_model:
        a0 = pm.Normal('a0', yLogMean, tau=1/(yLogSD*2)**2)
        a1SD = pm.Gamma('a1SD', agammaShRa[0], agammaShRa[1])
        a1 = pm.Normal('a1', 0.0, tau=1/a1SD**2, shape=Nx1Lvl)
        
        a2SD = pm.Gamma('a2SD', agammaShRa[0], agammaShRa[1])
        a2 = pm.Normal('a2', 0.0, tau=1/a2SD**2, shape=Nx2Lvl)
        
        a1a2SD = pm.Gamma('a1a2SD', agammaShRa[0], agammaShRa[1])
        a1a2 = pm.Normal('a1a2', 0.0, 1/a1a2SD**2, shape=(Nx1Lvl, Nx2Lvl))
        
        lmbda = pm.math.exp(a0 + a1[x1] + a2[x2] + a1a2[x1, x2])
        
        like = pm.Poisson('y', lmbda, observed=y)
        
pm.model_to_graphviz(poisson_model)
    
    Out[7]:
In [8]:
    
n_samples = 3000
with poisson_model:
    trace1 = pm.sample(n_samples, cores=4, nuts_kwargs={'target_accept': 0.95})
    
    
In [9]:
    
pm.traceplot(trace1);
    
    
In [10]:
    
# Transforming the trace data to sum-to-zero values
m = np.zeros((Nx1Lvl,Nx2Lvl, trace1.nchains*n_samples))
b1b2 = m.copy()
for (j1,j2) in np.ndindex(Nx1Lvl,Nx2Lvl):
        m[j1,j2] =  (trace1['a0'] +
                     trace1['a1'][:,j1] +
                     trace1['a2'][:,j2] +
                     trace1['a1a2'][:,j1,j2])
b0 = np.mean(m, axis=(0,1))
b1 = np.mean(m, axis=1) - b0
b2 = np.mean(m, axis=0) - b0
        
for (j1,j2) in np.ndindex(Nx1Lvl,Nx2Lvl):
        b1b2[j1,j2] =  (m[j1,j2] - (b0 + b1[j1] + b2[j2]))
# Compute predicted proportions
expm = np.exp(m)
ppx1x2p = expm/np.sum(expm, axis=(0,1))
    
In [11]:
    
# Define gridspec
fig, axes = plt.subplots(4,4, figsize=(18,12))
for (r, c), ax in np.ndenumerate(axes):
    ax = pm.plot_posterior(ppx1x2p[r,c,:], point_estimate='mode', color=color, ax=ax)
    ax.scatter(df_pivot['Prop'].iloc[r,c], 0, s=60, c='r', marker='^', zorder=5)
    ax.set_title('Eye:{}  Hair:{},\nN={}'.format(df_pivot.index[r],
                                                df_pivot['Count'].columns[c],
                                                df_pivot['Count'].iloc[r,c]),
                 fontdict=f_dict)
    ax.set_xlim(left=.0, right=.25)
    ax.set_xlabel('Proportion')
fig.tight_layout(pad=1.7);
    
    
In [12]:
    
fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(15,3))
blue_black = b1b2[0,0]
brown_black = b1b2[1,0]
blue_blond = b1b2[0,1]
brown_blond = b1b2[1,1]
# [1] Deflection difference between blue and brown eyes, for black hair
pm.plot_posterior(blue_black - brown_black, point_estimate='mode', color=color, ax=ax1)
ax1.set_title('Blue - Brown @ Black', fontdict=f_dict)
ax1.set_xlim(-2,0)
# [2] Deflection difference between blue and brown eyes, for blond hair
pm.plot_posterior(blue_blond - brown_blond, point_estimate='mode', color=color, ax=ax2)
ax2.set_title('Blue - Brown @ Blond', fontdict=f_dict)
ax2.set_xlim(0,3.5)
for ax in [ax1, ax2]:
    ax.set_xlabel('Beta Deflect. Diff.', fontdict=f_dict)
# [3] Difference of differences: [1] - [2]
pm.plot_posterior((blue_black - brown_black) - (blue_blond - brown_blond),
                  point_estimate='mode', color=color, ax=ax3)
ax3.set_title('Blue.v.Brown \n (x) \n Black.v.Blond', fontdict=f_dict)
ax3.set_xlim(-5,0)
ax3.set_xlabel('Beta Deflect. Diff. of Diff.');