In [1]:
%matplotlib inline
import numpy as np
import scipy as sp
import scipy.stats as stats
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import ConnectionPatch
import warnings
warnings.filterwarnings('ignore')
colors = sns.color_palette("Blues")
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
Coin-tossing MLE consistency
In [2]:
theta0 = .7
N = 5000
sample_sizes = np.arange(5, N, 10)
perc5 = [stats.binom(n, theta0).ppf(.05)/n for n in sample_sizes]
perc25 = [stats.binom(n, theta0).ppf(.2)/n for n in sample_sizes]
perc75 = [stats.binom(n, theta0).ppf(.8)/n for n in sample_sizes]
perc95 = [stats.binom(n, theta0).ppf(.95)/n for n in sample_sizes]
fig, ax = plt.subplots(1, 2, figsize = (12, 4))
fig.suptitle("Evolution of the maximumum likelihood estimator's induced action distribution in the coin tossing example", y = 1.02, fontsize = 14)
small_sample = 200
ax[0].fill_between(sample_sizes[:small_sample], perc5[:small_sample], perc25[:small_sample],
color = colors[2], alpha = .5, label = '$5\%$-$95\%$ percentiles')
ax[0].fill_between(sample_sizes[:small_sample], perc25[:small_sample], perc75[:small_sample],
color = colors[3], alpha = .5, label = '$20\%$-$80\%$ percentiles')
ax[0].fill_between(sample_sizes[:small_sample], perc75[:small_sample], perc95[:small_sample],
color = colors[2], alpha = .5)
ax[0].axhline(theta0, color = colors[5], lw = 2, label = r'True $\theta_0$')
ax[0].set_xlim([10, 300])
ax[0].set_ylim([0.4, 1])
ax[0].set_xlabel('Sample size', fontsize = 12)
ax[0].legend(loc='best', fontsize = 12)
ax[1].fill_between(sample_sizes, perc5, perc25, color = colors[2], alpha = .5, label = '$5\%$-$95\%$ percentiles')
ax[1].fill_between(sample_sizes, perc25, perc75, color = colors[3], alpha = .5, label = '$20\%$-$80\%$ percentiles')
ax[1].fill_between(sample_sizes, perc75, perc95, color = colors[2], alpha = .5)
ax[1].axhline(theta0, color = colors[5], lw = 2, label = r'True $\theta_0$')
ax[1].set_ylim([0.4, 1])
ax[1].set_xlabel('Sample size', fontsize = 12)
ax[1].legend(loc='best', fontsize = 12)
ax[1].axhline(theta0, color = colors[5], lw = 2, label = r'True $\theta_0$')
plt.tight_layout()
plt.savefig('asymptotic_cointoss_consistency.png', format = 'png', dpi = 800, bbox_inches='tight')
plt.show()
Uniform LLN -- coin-tossing tail probabilities
In [3]:
def loss_cdf(l_value, aa, true_dist, upper_part):
"""
Uses the formula for the change of discrete random variable. It
takes care of the fact that the quadratic loss is non-monotone.
L(theta0, a) = theta0*(1-a)**2 + (1-theta0)*a**2
Emprical loss = l_value implies the following empirical dist
p_n = (l - a**2)/((1 - a)**2 - a**2)
"""
n, theta0 = true_dist.args
# p_n corresponding to l_value and aa is
p_n = (l_value - aa**2)/((1 - aa)**2 - aa**2)
y_n = int(p_n * n)
# plot p_n(a), it's a hyperbola with break at a=.5
if aa < .5: # for a given aa, emp loss is increasing in p_n
if upper_part: # prob(emp loss > l_value)
return 1 - true_dist.cdf(y_n)
else: # prob(emp loss < l_value)
return true_dist.cdf(y_n)
else: # for a given a, emp loss is decreasing in p_n
if upper_part: # prob(emp loss > l_value)
return true_dist.cdf(y_n)
else: # prob(emp loss < l_value)
return 1 - true_dist.cdf(y_n)
def prob(delta, a, true_dist):
"""
We want the probability
|L_n - L| > delta
The random variable is L_n and the intervals of interest
L_n < L-delta < L+delta < L_n
Prob = CDF(L-delta) + (1 - CDF(L+delta))
"""
n, theta0 = true_dist.args
L = theta0 * (1 - a)**2 + (1 - theta0) * a**2
upper_part = loss_cdf(L + delta, a, true_dist, upper_part = True)
lower_part = loss_cdf(L - delta, a, true_dist, upper_part = False)
return lower_part + upper_part
In [4]:
sample_size = np.arange(5, 1000, 2)
action_grid1 = [.01, .3, .4, .6]
action_grid2 = [.99, .95, .9, .8]
delta = .02
epsilon = .18
fig, ax = plt.subplots(1, 2, figsize = (12, 4))
fig.suptitle('Tail probabilities for the MLE estimator (evaluated with quadratic loss) in the coin tossing example \n' +
r"$\delta = {:1.2f}$, $\theta_0 = {:1.2f}$, $\varepsilon = {:1.2f}$".format(delta, theta0, epsilon),
fontsize = 14, y = 1.07)
for i, a in enumerate(action_grid1):
ax[0].plot(sample_size, [prob(delta, a, stats.binom(nn, theta0)) for nn in sample_size],
label = r'a = {:1.2f}'.format(a), color = colors[2 + i])
n_star = sp.optimize.bisect(lambda nn: prob(delta, a, stats.binom(nn, theta0))-epsilon, .1, 1000)
ax[0].vlines(n_star, 0, epsilon, color = colors[2 + i], alpha = .9)
ax[0].axhline(epsilon, linestyle='--', color='k', lw =1, alpha=.7)
ax[0].legend(loc = 'best', fontsize = 12)
ax[0].set_xlabel('Sample size', fontsize = 12)
ax[0].set_ylabel(r'$\varepsilon$', fontsize = 14, rotation=0)
ax[0].yaxis.set_label_coords(-.09, .16)
for i, a in enumerate(action_grid2):
ax[1].plot(sample_size, [prob(delta, a, stats.binom(nn, theta0)) for nn in sample_size],
label = r'a = {:1.2f}'.format(a), color = colors[2 + i])
n_star = sp.optimize.bisect(lambda nn: prob(delta, a, stats.binom(nn, theta0))-epsilon, .1, 1000)
ax[1].vlines(n_star, 0, epsilon, color = colors[2 + i], alpha = .9)
ax[1].axhline(epsilon, linestyle='--', color='k', lw =1, alpha=.7)
ax[1].legend(loc = 'best', fontsize = 12)
ax[1].set_xlabel('Sample size', fontsize = 12)
plt.tight_layout()
plt.savefig('asymptotic_cointoss_tail.png', format = 'png', dpi = 800, bbox_inches='tight')
plt.show()
Concentration inequalities
In [5]:
z = np.linspace(0, 10, 1000)
t = 3
f1 = (z >= t)
f2 = z / t
colors = sns.color_palette()
fig, ax = plt.subplots(1, 2, figsize = (12, 4), sharey = True)
ax[0].add_artist(ConnectionPatch(xyA=(0, 0), xyB=(10, 0), coordsA="data", coordsB="data",
arrowstyle="->", mutation_scale=10, lw = 2))
ax[0].add_artist(ConnectionPatch(xyA=(0, 0), xyB=(0, 3.5), coordsA="data", coordsB="data",
arrowstyle="->", mutation_scale=10, lw = 2))
ax[0].plot(z, f1, color = colors[0], lw = 3, label = r'$\mathbf{1}\{Z \geq t\}$')
ax[0].plot(z, f2, color = colors[1], lw = 3, label = r'$\frac{Z}{t}$')
ax[0].set_xticklabels([])
ax[0].set_yticklabels([])
ax[0].text(9.3, -.25, r'$Z$', fontsize = 12)
ax[0].text(8, 3, r'$\frac{Z}{t}$', fontsize = 15)
ax[0].text(5, .7, '$\mathbf{1}\{Z \geq t\}$', fontsize = 13)
ax[0].text(t - .05, -.25, '$t$', fontsize = 12)
ax[0].text(0, -.25, '$0$', fontsize = 12)
ax[0].text( -.6, .93, r'$1$', fontsize = 12)
ax[0].axhline(1, linestyle = '--', color = 'k', lw = 1)
ax[0].set_title('Logic behind Markov\'s inequality', fontsize=16)
t = 2.5
mu = 2
z = np.linspace(mu - 6, mu + 6, 1000)
g1 = (z >= mu + t) + (z <= mu - t)
g2 = ((z - mu) / t)**2
ax[1].add_artist(ConnectionPatch(xyA=(mu - 6, 0), xyB=(mu + 6, 0), coordsA="data", coordsB="data",
arrowstyle="->", mutation_scale=10, lw = 2))
ax[1].plot(z, g1, color = colors[0], lw = 3, label = r'$\mathbf{1}\{Z \geq t\}$')
ax[1].plot(z, g2, color = colors[1], lw = 3, label = r'$Z/t$')
ax[1].set_xticklabels([])
ax[1].set_yticklabels([])
ax[1].text(mu + 5.3, -.25, r'$Z$', fontsize = 12)
ax[1].text(4.7, 2.8, r'$\frac{|Z-\mu|^2}{t^2}$', fontsize = 15)
ax[1].text(4.7, .7, r'$\mathbf{1}\{|Z -\mu |\geq t\}$', fontsize = 13)
ax[1].text(mu - .05, -.25, r'$\mu$', fontsize = 12)
ax[1].text(mu + t - .15, -.25, r'$\mu + t$', fontsize = 12)
ax[1].text(mu - t - .15, -.25, r'$\mu - t$', fontsize = 12)
#ax[1].text(mu - 6 - .6, .93, r'$1$', fontsize = 12)
ax[1].set_title('Logic behind Chebyshev\'s inequality', fontsize=16)
ax[1].set_ylim([0, 3.5])
plt.tight_layout()
plt.savefig('asymptotic_markov_chebyshev.png', format = 'png', dpi = 800, bbox_inches='tight')
plt.show()
Rademacher complexity -- coin-tossing
In [6]:
# Compute the Rademacher compelxity for the coin tossing example
n, k = 10, 6
def given_epsilon_path(n3, n4, n, k):
n1 = n - k - n3
n2 = k - n4
diff1 = n1 - n3
diff2 = n2 - n4
term1 = abs(diff1/n)
term2 = abs(diff2/n)
if diff1 + diff2 != 0:
term3 = abs((diff2/(diff1 + diff2))**2 * (diff1/n) + (diff1/(diff1 + diff2))**2 * (diff2/n))
else:
term3 = 0
return max(term1, term2, term3)
def empirical_rademacher(n, k):
"""
This function returns the empirical rademacher complexity of the coin tossing example
"""
prob_row = stats.binom(k, .5).pmf(np.arange(k + 1)).reshape(k + 1, 1)
prob_col = stats.binom(n - k, .5).pmf(np.arange(n - k + 1)).reshape(1, n - k + 1)
prob_matrix = prob_row * prob_col
emp_rad = 0
for i in range(k + 1): # loop for n4
for j in range(n - k + 1): # loop for n3
emp_rad += given_epsilon_path(j, i, n, k) * prob_matrix[i, j]
return emp_rad
def rademacher_complexity(n):
"""
Using the true distribution of the sample, this function calculates the
rademacher complexity from the empirical rademacher complexity
"""
true_prob = stats.binom(n, theta0).pmf(np.arange(n + 1)).reshape(n + 1, 1)
rademacher = 0
store_empirical = np.zeros(n+1)
for i in range(n + 1):
store_empirical[i] = empirical_rademacher(n, i)
rademacher += true_prob[i] * store_empirical[i]
return rademacher[0], store_empirical
def tail_figure(sample_size, theta0, action, tail_prob):
"""
Returns the delta that makes the probability of
L(P_n, a) < L(P,a) - delta < L(P,a) + delta < L(P_n, a)
equals to tail_prob, for a given action, theta0 and sample_size.
"""
delta_star = sp.optimize.bisect(lambda d: prob(d, action,
stats.binom(sample_size, theta0))-tail_prob, 1e-8, .5)
return delta_star
samples = np.arange(10, 1000, 50)
RC_ = np.zeros(len(samples))
ERC = []
for i, n in enumerate(samples):
rc, erc = rademacher_complexity(n)
ERC.append(erc)
RC_[i] = rc
data, data2 = pd.DataFrame(data=RC_), pd.DataFrame(data=ERC)
data.to_pickle("./rademacher.pickle")
data2.to_pickle("./empirical_rademacher.pickle")
In [7]:
samples = np.arange(10, 1000, 50)
RC = np.asarray(pd.read_pickle("./rademacher.pickle")).squeeze()
emp_rad = np.asarray(pd.read_pickle("./empirical_rademacher.pickle"))
colors = sns.color_palette("Blues")
fig, ax = plt.subplots(1, 2, figsize = (12, 4))
for j in range(3):
z_n = np.asarray([sp.stats.binom(1, theta0).rvs() for i in range(1000)])
cumsum_z_n = np.cumsum(z_n)
er = []
for i, n in enumerate(samples):
er.append(emp_rad[i, cumsum_z_n[n]])
ax[0].plot(samples, er, color = colors[2+j])
ax[0].set_title("Empirical Rademacher complexity of $\mathcal{L}_{\mathcal{A}}(z^n)$ \n \\
for alternative samples", fontsize = 14)
ax[0].set_xlabel('Sample size', fontsize = 12)
ax[0].set_ylim([0, .25])
ax[1].plot(samples, RC, lw = 3)
ax[1].set_title("Rademacher complexity of $\mathcal{L}_{\mathcal{A}}$ \n \\
as a function of sample size", fontsize = 14)
ax[1].set_xlabel('Sample size', fontsize = 12)
ax[1].set_ylim([0, .25])
plt.tight_layout()
plt.savefig('asymptotic_rademacher_cointoss.png', format = 'png', dpi = 800, bbox_inches='tight')
plt.show()
Compare tail bounds for different actions with the Rademacher bound
In [10]:
varepsilon = .05
aa = .99
sample_size = np.arange(5, 1000, 10)
actions = [.4, .9, .95, 1]
truths = [.01, .6, .95]
store_tailbound = np.zeros((len(sample_size), len(actions)))
store_truths = np.zeros((len(sample_size), len(truths)))
for i, n in enumerate(sample_size):
for j, a in enumerate(actions):
store_tailbound[i, j] = tail_figure(n, theta0, a, varepsilon)
for i, n in enumerate(sample_size):
for j, t in enumerate(truths):
store_truths[i, j] = tail_figure(n, t, aa, varepsilon)
RC_bound = 2*RC + np.sqrt(np.log(2/varepsilon)/(2*samples))
ERC_bound = 2*np.asarray(er) + np.sqrt(4*np.log(2/varepsilon)/(samples))
In [11]:
colors = sns.color_palette("Blues")
colors_g = sns.color_palette("BuGn_r")
fig, ax = plt.subplots(1, 2, figsize = (12, 4), sharey = True)
ax[0].set_title('Uniform and action specific bounds using the true model\n' +
r"$\varepsilon = {:1.2f}$, $\theta_0 = {:1.2f}$".format(varepsilon, theta0), fontsize = 13)
ax[0].plot(samples, RC_bound, lw = 3, color = sns.color_palette()[2], label = 'Uniform bound')
for i, a in enumerate(actions):
ax[0].plot(sample_size, store_tailbound[:, i], label = r'$\delta_n$' + '({:1.2f})'.format(a), color = colors[2+i])
ax[0].legend(loc = 'best', fontsize = 12)
ax[0].set_xlabel('Sample size', fontsize = 12)
ax[0].set_ylabel('Absolute difference', fontsize = 12)
ax[0].set_ylim([0, .5])
ax[1].set_title(r'Uniform and distribution specific bounds using $\mathsf{R}\left(\mathcal{L}_{\mathcal{A}}(z^n)\right)$' + '\n' +
r"$\varepsilon = {:1.2f}$, $a = {:1.2f}$".format(varepsilon, aa), fontsize = 13)
ax[1].plot(samples, RC_bound, lw = 3, color = sns.color_palette()[2], label = 'Uniform bound')
for i, t in enumerate(truths):
ax[1].plot(sample_size, store_truths[:, i], label = r'$\theta=$' + '{:1.2f}'.format(t), color = colors_g[i])
ax[1].legend(loc = 'best', fontsize = 12)
ax[1].set_xlabel('Sample size', fontsize = 12)
plt.tight_layout()
plt.savefig('rademacher_tail_bound.png', format = 'png', dpi=800 ,bbox_inches='tight')
Rademacher -- sinusoid classification
In [11]:
np.random.seed(12)
aux = np.random.rand(100)
def rademacher(n):
'''Generates N Rademcaher random variables.'''
return np.array([1 if x >= .5 else -1 for x in aux[:n]])
def rad_figure(n):
x = np.array([2**(-(i+1)) for i in range(n)])
y = rademacher(int(n))
y_aux = np.array([1 if r > 0 else 0 for r in y])
x_aux = np.array([2**((i+1)) for i in range(int(n))])
a = np.pi*((1-y_aux) @ x_aux + 1)
c = np.array([1 if np.sin(a*x) > 0 else -1 for x in x])
R = c @ y / int(n)
return a, x, y, R, a
n1, n2 = 4, 6
a1, x1, y1, R1, a1 = rad_figure(n1)
a2, x2, y2, R2, a2 = rad_figure(n2)
x_axis = np.linspace(0, .5, 1000)
colors = sns.color_palette()
fig, ax = plt.subplots(1, 2, figsize = (12, 4), sharey = True)
fig.suptitle(r'Empirical Rademacher complexity of $\mathcal{L}_{\mathcal{A}}(z^n)$',
fontsize=14, y=1.04)
sin1 = np.sin(a1*x_axis)
ax[0].plot(x_axis, sin1, color = colors[3], label = 'Classifier')
ax[0].fill_between(x_axis, 0, sin1, color = colors[3], alpha = .5)
ax[0].scatter(x1, y1, color = colors[2], label = 'Sample')
ax[0].set_xlim([-.05, .55])
ax[0].set_ylim([-1.55, 1.5])
ax[0].set_xlabel(r"Covariate, $X$")
ax[0].set_ylabel(r"$Y$", rotation = 0)
ax[0].text(.4, 1.2, r'$\mathsf{R}(\mathcal{L}_{\mathcal{A}}(' +
r'z^{})) = $'.format({int(n1)}) + ' {:.2f}'.format(R1), fontsize=12)
ax[0].text(.4, -1.25, r'optimal $a = $' + ' {:.1f}'.format(a1), fontsize=12)
ax[0].set_title(r'Sample size = {}'.format({int(n1)}), fontsize=12)
ax[0].axhline(0, color = 'k', alpha=.5)
ax[0].legend(loc=3)
sin2 = np.sin(a2*x_axis)
ax[1].plot(x_axis, sin2, color = colors[3], label = 'Classifier')
ax[1].fill_between(x_axis, 0, sin2, color = colors[3], alpha = .5)
ax[1].scatter(x2, y2, color = colors[2], label = 'Sample')
ax[1].set_xlim([-.05, 0.55])
ax[1].set_ylim([-1.55, 1.5])
ax[1].set_xlabel(r"Covariate, $X$")
ax[1].text(.4, 1.2, r'$\mathsf{R}(\mathcal{L}_{\mathcal{A}}(' +
r'z^{})) = $'.format({int(n2)}) + ' {:.2f}'.format(R2), fontsize=12)
ax[1].text(.4, -1.25, r'optimal $a = $' + ' {:.1f}'.format(a2), fontsize=12)
ax[1].set_title(r'Sample size = {}'.format({int(n2)}), fontsize=12)
ax[1].axhline(0, color = 'k', alpha=.5)
ax[1].legend(loc=3)
plt.tight_layout()
plt.savefig('asymptotic_rademacher_sinusoid.png', format = 'png', dpi=800 ,bbox_inches='tight')