Code

Date: February, 2017


In [1]:
%matplotlib inline

import numpy as np
import scipy as sp
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# For linear regression
from scipy.stats import multivariate_normal
from scipy.integrate import dblquad

# Shut down warnings for nicer output
import warnings
warnings.filterwarnings('ignore')

colors = sns.color_palette()
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

Coin tossing example


In [2]:
#===================================================
# FUNCTIONS
#===================================================

def relative_entropy(theta0, a):
    return theta0 * np.log(theta0/a) + (1 - theta0) * np.log((1 - theta0)/(1 - a))

def quadratic_loss(theta0, a):
    return (a - theta0)**2

def loss_distribution(l, dr, loss, true_dist, theta0, y_grid):
    """
    Uses the formula for the change of discrete random variable. It takes care of the 
    fact that relative entropy is not monotone.
    """
    eps = 1e-16
    if loss == 'relative_entropy':
        a1 = sp.optimize.bisect(lambda a: relative_entropy(theta0, a) - l, a = eps, b = theta0)
        a2 = sp.optimize.bisect(lambda a: relative_entropy(theta0, a) - l, a = theta0, b = 1 - eps)
    elif loss == 'quadratic':
        a1 = theta0 - np.sqrt(l)
        a2 = theta0 + np.sqrt(l)
        
    if np.isclose(a1, dr).any():
        y1 = y_grid[np.isclose(a1, dr)][0]
        prob1 = true_dist.pmf(y1)
    else:
        prob1 = 0.0

    if np.isclose(a2, dr).any():
        y2 = y_grid[np.isclose(a2, dr)][0]
        prob2 = true_dist.pmf(y2)
    else:
        prob2 = 0.0
    
    if np.isclose(a1, a2): 
        # around zero loss, the two sides might find the same a
        return prob1
    else:
        return prob1 + prob2
    
def risk_quadratic(theta0, n, alpha=0, beta=0):
    """
    See Casella and Berger, p.332
    """
    first_term = n * theta0 * (1 - theta0)/(alpha + beta + n)**2
    second_term = ((n * theta0 + alpha)/(alpha + beta + n) - theta0)**2
    
    return first_term + second_term

def loss_figures(theta0, n, alpha, beta, mle=True, entropy=True):
    
    true_dist = stats.binom(n, theta0)

    y_grid = np.arange(n + 1)                       # sum of ones in a sample
    a_grid = np.linspace(0, 1, 100)                 # action space represented as [0, 1]
    
    # The two decision functions (as a function of Y)
    decision_rule = y_grid/n
    decision_rule_bayes = (y_grid + alpha)/(n + alpha + beta) 

    if mle and entropy:
        """
        MLE with relative entropy loss
        """        
        loss = relative_entropy(theta0, decision_rule)
        loss_dist = np.asarray([loss_distribution(i, decision_rule, "relative_entropy", 
                                                  true_dist, theta0, 
                                                  y_grid) for i in loss[1:-1]])
        loss_dist = np.hstack([true_dist.pmf(y_grid[0]), loss_dist, true_dist.pmf(y_grid[-1])])
        risk = loss @ loss_dist

    elif mle and not entropy:
        """
        MLE with quadratic loss
        """        
        loss = quadratic_loss(theta0, decision_rule)
        loss_dist = np.asarray([loss_distribution(i, decision_rule, "quadratic",
                                                  true_dist, theta0, 
                                                  y_grid) for i in loss])

        risk = risk_quadratic(theta0, n)

        
    elif not mle and entropy:
        """
        Bayes with realtive entropy loss
        """        
        loss = relative_entropy(theta0, decision_rule_bayes)
        loss_dist = np.asarray([loss_distribution(i, decision_rule_bayes, "relative_entropy",
                                                  true_dist, theta0, y_grid) for i in loss])
        risk = loss @ loss_dist

    
    elif not mle and not entropy:
        """
        Bayes with quadratic loss
        """        
        loss = quadratic_loss(theta0, decision_rule_bayes)
        loss_dist = np.asarray([loss_distribution(i, decision_rule_bayes, "quadratic",
                                                  true_dist, theta0, y_grid) for i in loss])
        risk = risk_quadratic(theta0, n, alpha, beta)

    return loss, loss_dist, risk

In [3]:
theta0 = .79
n = 25
alpha, beta = 7, 2

#=========================
# Elements of Figure 1
#=========================
true_dist = stats.binom(n, theta0)

y_grid = np.arange(n + 1)                       # sum of ones in a sample
a_grid = np.linspace(0, 1, 100)                 # action space represented as [0, 1]
rel_ent = relative_entropy(theta0, a_grid)      # form of the loss function
quadratic = quadratic_loss(theta0, a_grid)      # form of the loss function

#=========================
# Elements of Figure 2
#=========================
theta0_alt = .39
true_dist_alt = stats.binom(n, theta0_alt)

# The two decision functions (as a function of Y)
decision_rule = y_grid/n
decision_rule_bayes = (y_grid + alpha)/(n + alpha + beta) 

#=========================
# Elements of Figure 3
#=========================
loss_re_mle, loss_dist_re_mle, risk_re_mle = loss_figures(theta0, n, alpha, beta)
loss_quad_mle, loss_dist_quad_mle, risk_quad_mle = loss_figures(theta0, n, alpha, beta, 
                                                                entropy=False)
loss_re_bayes, loss_dist_re_bayes, risk_re_bayes = loss_figures(theta0, n, alpha, beta, 
                                                                mle=False)
loss_quad_bayes, loss_dist_quad_bayes, risk_quad_bayes = loss_figures(theta0, n, alpha, beta, 
                                                                      mle=False, entropy=False)


loss_re_mle_alt, loss_dist_re_mle_alt, risk_re_mle_alt = loss_figures(theta0_alt, 
                                                                      n, alpha, beta)
loss_quad_mle_alt, loss_dist_quad_mle_alt, risk_quad_mle_alt = loss_figures(theta0_alt, n, 
                                                                            alpha, beta, entropy=False)
loss_re_bayes_alt, loss_dist_re_bayes_alt, risk_re_bayes_alt = loss_figures(theta0_alt, n, 
                                                                            alpha, beta, mle=False)
loss_quad_bayes_alt, loss_dist_quad_bayes_alt, risk_quad_bayes_alt = loss_figures(theta0_alt, 
                                                                                  n, alpha, beta, 
                                                                                  mle=False, entropy=False)

In [4]:
fig, ax = plt.subplots(1, 2, figsize = (12, 4))

ax[0].set_title('True distribution over Y', fontsize = 14)
ax[0].plot(y_grid, true_dist.pmf(y_grid), 'o', color = sns.color_palette()[3])
ax[0].vlines(y_grid, 0, true_dist.pmf(y_grid), lw = 4, color = sns.color_palette()[3], alpha = .7)
ax[0].set_xlabel(r'Number of ones in the sample', fontsize = 12)

ax[1].set_title('Loss functions over the action space', fontsize = 14)
ax[1].plot(a_grid, rel_ent, lw = 2, label = 'relative entropy loss')
ax[1].plot(a_grid, quadratic, lw = 2, label = 'quadratic loss')
ax[1].axvline(theta0, color = sns.color_palette()[2], lw = 2, label = r'$\theta_0=${t}'.format(t=theta0))
ax[1].legend(loc = 'best', fontsize = 12)
ax[1].set_xlabel(r'Actions $(a)$', fontsize = 12)
plt.tight_layout()

plt.savefig("./example1_fig1.png", dpi=800)



In [5]:
fig, ax = plt.subplots(1, 2, figsize = (12, 4))

ax[0].set_title('Induced action distribution of the MLE estimator', fontsize = 14)
# Small bias
ax[0].plot(decision_rule, true_dist.pmf(y_grid), 'o')
ax[0].vlines(decision_rule, 0, true_dist.pmf(y_grid), lw = 5, alpha = .9, color = sns.color_palette()[0])
ax[0].axvline(theta0, color = sns.color_palette()[2], lw = 2, label = r'$\theta_0=${t}'.format(t=theta0))
# Large bias for Bayes
ax[0].plot(decision_rule, true_dist_alt.pmf(y_grid), 'o', color = sns.color_palette()[0], alpha = .4)
ax[0].vlines(decision_rule, 0, true_dist_alt.pmf(y_grid), lw = 4, alpha = .4, color = sns.color_palette()[0])
ax[0].axvline(theta0_alt, color = sns.color_palette()[2], lw = 2, alpha = .4, 
              label = r'$\theta=${t}'.format(t=theta0_alt))
ax[0].legend(loc = 'best', fontsize = 12)
ax[0].set_ylim([0, .2])
ax[0].set_xlim([0, 1])
ax[0].set_xlabel(r'Actions $(a)$', fontsize = 12)

ax[1].set_title('Induced action distribution of the Bayes estimator', fontsize = 14)
# Small bias
ax[1].plot(decision_rule_bayes, true_dist.pmf(y_grid), 'o', color = sns.color_palette()[1])
ax[1].vlines(decision_rule_bayes, 0, true_dist.pmf(y_grid), lw = 5, alpha = .9, 
                color = sns.color_palette()[1])
ax[1].axvline(theta0, color = sns.color_palette()[2], lw = 2, label = r'$\theta_0=${t}'.format(t=theta0))
# Large bias for Bayes
ax[1].plot(decision_rule_bayes, true_dist_alt.pmf(y_grid), 'o', color = sns.color_palette()[1], alpha = .4)
ax[1].vlines(decision_rule_bayes, 0, true_dist_alt.pmf(y_grid), lw = 4, alpha = .4, 
                color = sns.color_palette()[1])
ax[1].axvline(theta0_alt, color = sns.color_palette()[2], lw = 2, alpha = .4, 
              label = r'$\theta=${t}'.format(t=theta0_alt))
ax[1].legend(loc = 'best', fontsize = 12)
ax[1].set_ylim([0, .2])
ax[1].set_xlim([0, 1])
ax[1].set_xlabel(r'Actions $(a)$', fontsize = 12)

plt.tight_layout()
plt.savefig("./example1_fig2.png", dpi=800)



In [6]:
fig, ax = plt.subplots(2, 2, figsize = (12, 6))

ax[0, 0].set_title('Induced entropy loss distribution (MLE estimator)', fontsize = 14)
ax[0, 0].vlines(loss_re_mle, 0, loss_dist_re_mle, lw = 9, alpha = .9, color = sns.color_palette()[0]) 
ax[0, 0].axvline(risk_re_mle, lw = 3, linestyle = '--',
                     color = sns.color_palette()[0], label = r"Entropy risk ($\theta_0={t}$)".format(t=theta0))
ax[0, 0].vlines(loss_re_mle_alt, 0, loss_dist_re_mle_alt, lw = 9, alpha = .3, color = sns.color_palette()[0]) 
ax[0, 0].axvline(risk_re_mle_alt, lw = 3, linestyle = '--', alpha = .4,
                     color = sns.color_palette()[0], label = r"Entropy risk ($\theta={t}$)".format(t=theta0_alt))

ax[0, 0].set_xlim([0, .1])
ax[0, 0].set_ylim([0, .2])
ax[0, 0].set_xlabel('Loss', fontsize=12)
ax[0, 0].legend(loc = 'best', fontsize = 12)
    
ax[1, 0].set_title('Induced entropy loss distribution (Bayes estimator)', fontsize=14)
ax[1, 0].vlines(loss_re_bayes, 0, loss_dist_re_bayes, lw=9, alpha=.9, color=sns.color_palette()[1]) 
ax[1, 0].axvline(risk_re_bayes, lw=3, linestyle='--',
                     color = sns.color_palette()[1], label=r"Entropy risk ($\theta_0={t}$)".format(t=theta0))
ax[1, 0].vlines(loss_re_bayes_alt, 0, loss_dist_re_bayes_alt, lw=9, alpha=.3, color=sns.color_palette()[1]) 
ax[1, 0].axvline(risk_re_bayes_alt, lw=3, linestyle='--', alpha=.4, color=sns.color_palette()[1], 
                 label=r"Entropy risk ($\theta={t}$)".format(t=theta0_alt))
ax[1, 0].set_xlim([0, .1])
ax[1, 0].set_ylim([0, .2])
ax[1, 0].set_xlabel('Loss')
ax[1, 0].legend(loc='best', fontsize=12)

ax[0, 1].set_title('Induced quadratic loss distribution (MLE estimator)', fontsize=14)
ax[0, 1].vlines(loss_quad_mle, 0, loss_dist_quad_mle, lw=9, alpha=.9, color=sns.color_palette()[0]) 
ax[0, 1].axvline(risk_quad_mle, lw=3, linestyle='--', 
                     color = sns.color_palette()[0], label=r"Quadratic risk ($\theta_0={t}$)".format(t=theta0))
ax[0, 1].vlines(loss_quad_mle_alt, 0, loss_dist_quad_mle_alt, lw=9, alpha=.3, color=sns.color_palette()[0]) 
ax[0, 1].axvline(risk_quad_mle_alt, lw=3, linestyle='--', alpha=.4,
                     color=sns.color_palette()[0], label=r"Quadratic risk ($\theta={t}$)".format(t=theta0_alt))
ax[0, 1].set_xlim([0, .05])
ax[0, 1].set_ylim([0, .2])
ax[0, 1].set_xlabel('Loss', fontsize=12)
ax[0, 1].legend(loc='best', fontsize=12)
    
ax[1, 1].set_title('Induced quadratic loss distribution (Bayes estimator)', fontsize=14)
ax[1, 1].vlines(loss_quad_bayes, 0, loss_dist_quad_bayes, lw=9, alpha=.9, color=sns.color_palette()[1]) 
ax[1, 1].axvline(risk_quad_bayes, lw=3, linestyle='--', 
                     color = sns.color_palette()[1], label=r"Quadratic risk ($\theta_0={t}$)".format(t=theta0))
ax[1, 1].vlines(loss_quad_bayes_alt, 0, loss_dist_quad_bayes_alt, lw=9, alpha=.3, color=sns.color_palette()[1]) 
ax[1, 1].axvline(risk_quad_bayes_alt, lw=3, linestyle = '--', alpha=.4,
                     color=sns.color_palette()[1], label=r"Quadratic risk ($\theta={t}$)".format(t=theta0_alt))
ax[1, 1].set_xlim([0, .05])
ax[1, 1].set_ylim([0, .2])
ax[1, 1].set_xlabel('Loss', fontsize=12)
ax[1, 1].legend(loc='best', fontsize=12)

plt.tight_layout()
plt.savefig("./example1_fig3.png", dpi=800)


Bayes OLS example


In [7]:
mu = np.array([1, 3])                    # mean
sigma = np.array([[4, 1], [1, 8]])       # covariance matrix
n = 50                                   # sample size

# Bayes priors
mu_bayes = np.array([2, 2])
precis_bayes = np.array([[6, -3], [-3, 6]])

# joint normal rv for (Y,X)
mvnorm = multivariate_normal(mu, sigma)

# decision rule -- OLS estimator
def d_OLS(Z, n):
    Y = Z[:, 0]
    X = np.stack((np.ones(n), Z[:,1]), axis=-1)
    return np.linalg.inv(X.T @ X) @ X.T @ Y

# decision rule -- Bayes
def d_bayes(Z, n):
    Y = Z[:, 0]
    X = np.stack((np.ones(n), Z[:,1]), axis=-1)
    return np.linalg.inv(X.T @ X + precis_bayes) @ (precis_bayes @ mu_bayes + X.T @ Y)

# loss -- define integrand
def loss_int(y, x, b):
    '''Defines the integrand under mvnorm distribution.'''
    return (y - b[0] - b[1]*x)**2*mvnorm.pdf((y,x))

# simulate distribution over actions and over losses
B_OLS = []
L_OLS = []
B_bayes = []
L_bayes = []

for i in range(1000):
    # generate sample
    Z = mvnorm.rvs(n)
    
    # get OLS action corrsponding to realized sample
    b_OLS = d_OLS(Z, n)
    
    # get Bayes action
    b_bayes = d_bayes(Z, n)
    
    # get loss through integration
    l_OLS = dblquad(loss_int, -np.inf, np.inf, lambda x: -np.inf, lambda x: np.inf, args=(b_OLS,)) # get loss
    l_bayes = dblquad(loss_int, -np.inf, np.inf, lambda x: -np.inf, lambda x: np.inf, args=(b_bayes,)) # get loss
    
    # record action
    B_OLS.append(b_OLS)
    B_bayes.append(b_bayes)
    
    # record loss
    L_OLS.append(l_OLS)
    L_bayes.append(l_bayes)

# take first column if integrating    
L_OLS = np.array(L_OLS)[:, 0]
L_bayes = np.array(L_bayes)[:, 0]

In [49]:
B_OLS = pd.DataFrame(B_OLS, columns=["$\\beta_0$", "$\\beta_1$"])
B_bayes = pd.DataFrame(B_bayes, columns=["$\\beta_0$", "$\\beta_1$"])

g1 = sns.jointplot(x = "$\\beta_0$", y = "$\\beta_1$", data=B_OLS, kind="kde", 
                   space=0.3, color = sns.color_palette()[0], size=5, xlim = (-.5, 1.6), ylim = (-.1, .4))
g1.ax_joint.plot([mu[0] - sigma[0,1]/sigma[1,1]*mu[1]],[sigma[0,1]/sigma[1,1]], 'ro', color='r', label='best-in-class')
g1.set_axis_labels(r'$\beta_0$', r'$\beta_1$', fontsize=14)
g1.fig.suptitle('Induced action distribution -- OLS', fontsize=14, y=1.04)
plt.savefig("./example2_fig1a.png", dpi=800)

g2 = sns.jointplot(x = "$\\beta_0$", y = "$\\beta_1$", data=B_bayes, kind="kde", 
                   space=0.3, color = sns.color_palette()[0], size=5, xlim = (-.5, 1.6), ylim = (-.1, .4))
g2.ax_joint.plot([mu[0] - sigma[0,1]/sigma[1,1]*mu[1]],[sigma[0,1]/sigma[1,1]], 'ro', color='r', label='best-in-class')
g2.set_axis_labels(r'$\beta_0$', r'$\beta_1$', fontsize=14)
g2.fig.suptitle('Induced action distribution -- Bayes', fontsize=14, y=1.04)
plt.savefig("./example2_fig1b.png", dpi=800)


plt.show()



In [28]:
b_best = [mu[0] - sigma[0,1]/sigma[1,1]*mu[1], sigma[0,1]/sigma[1,1]]
l_best = dblquad(loss_int, -np.inf, np.inf, lambda x: -np.inf, lambda x: np.inf, args=(b_best,))
print(l_best[0])


3.8749999999509477

In [50]:
plt.figure(figsize=(11, 5))
plt.axvline(x=l_best[0], ymin=0, ymax=1, linewidth=3, color = colors[4], label='Best-in-class loss')
plt.axvline(x=L_OLS.mean(), ymin=0, ymax=1, linewidth=3, color = colors[2], label='Risk of OLS')
plt.axvline(x=L_bayes.mean(), ymin=0, ymax=1, linewidth=3, color = colors[3], label='Risk of Bayes')
sns.distplot(L_OLS, bins=50, kde=False, color = colors[0], label='OLS')
sns.distplot(L_bayes, bins=50, kde=False, color = colors[1], label='Bayes')
plt.title('Induced loss distribution', fontsize = 14, y=1.02)
plt.legend(fontsize=12)
plt.xlabel('Loss', fontsize=12)
plt.xlim([3.8, 4.5])
plt.tight_layout()

plt.savefig("./example2_fig2.png", dpi=800)



In [44]:
beta_0 = mu[0] - sigma[0,1]/sigma[1,1]*mu[1]
beta_1 = sigma[0,1]/sigma[1,1]

print('Bias of OLS')
print('==========================')
print('{:.4f} - {:.4f} = {:.4f}'.format(beta_0, B_OLS.mean()[0], beta_0 - B_OLS.mean()[0]))
print('{:.4f} - {:.4f} = {:.4f}\n\n'.format(beta_1, B_OLS.mean()[1], beta_1 - B_OLS.mean()[1]))

print('Bias of Bayes')
print('==========================')
print('{:.4f} - {:.4f} = {:.4f}'.format(beta_0, B_bayes.mean()[0], beta_0 - B_bayes.mean()[0]))
print('{:.4f} - {:.4f} = {:.4f}'.format(beta_1, B_bayes.mean()[1], beta_1 - B_bayes.mean()[1]))


Bias of OLS
==========================
0.6250 - 0.6011 = 0.0239
0.1250 - 0.1292 = -0.0042


Bias of Bayes
==========================
0.6250 - 0.6530 = -0.0280
0.1250 - 0.1287 = -0.0037

In [45]:
print('Variance of OLS')
print('======================')
print(B_OLS.var())

print('\n\nVarinace of Bayes')
print('======================')
print(B_bayes.var())


Variance of OLS
======================
$\beta_0$    0.164572
$\beta_1$    0.010741
dtype: float64


Varinace of Bayes
======================
$\beta_0$    0.093559
$\beta_1$    0.007761
dtype: float64

In [46]:
print('Risk of OLS:   {:.4f} \nRisk of Bayes: {:.4f}'.format(L_OLS.mean(), L_bayes.mean()))


Risk of OLS:   4.0339 
Risk of Bayes: 3.9999

In [ ]: