In [1]:
import numpy as np
np.random.seed(0)
from selection.SPRT import SPRT
%pylab inline
In [2]:
sequential_test = SPRT(lambda n: 4 * np.sqrt(n), nmin=3, nmax=5)
If we feed this constraint zero and measurement $\sigma$ 1.5 (so that the standard deviation of one difference is $\sqrt{2} \cdot 1.5$). We expect the constraints to be only on the third and fourth running sum. The third running sum should be between $\pm 6 \sqrt{6}$ while the fourth should be between $\pm 12 \sqrt{2}$.
In [3]:
sigma = 1.5
zero = np.zeros((sequential_test.nmax, 2))
zero_results = sequential_test(zero, sigma=sigma)
zero_results.outcome
Out[3]:
In [4]:
zero_results.selection_constraints[0].inequality
Out[4]:
In [5]:
print zero_results.selection_constraints[0].inequality_offset
print (6*np.sqrt(6), 12*np.sqrt(2))
If we instead give it data with a strong signal, we expect it will terminate very early. The boundaries are unchanged so we should be able to make it stop at either 3 steps or 4 steps.
In [6]:
upper = np.zeros((sequential_test.nmax, 2))
upper[:,1] = np.array([2,4,10,0,0])
upper_results = sequential_test(upper, sigma=sigma)
(upper_results.outcome, upper_results.stopping_time)
Out[6]:
In [7]:
(upper_results.selection_constraints[0].inequality,
upper_results.selection_constraints[0].inequality_offset)
Out[7]:
In [8]:
later = np.zeros((sequential_test.nmax, 2))
later[:,1] = -np.array([2,4,4,7,0])
later_results = sequential_test(later, sigma=sigma)
(later_results.outcome, later_results.stopping_time)
Out[8]:
In [9]:
(later_results.selection_constraints[0].inequality,
later_results.selection_constraints[0].inequality_offset)
Out[9]:
The returned object has a truncated Gaussian associated to it.
In [10]:
quantile = later_results.trunc_gauss.quantile(0.05)
later_results.trunc_gauss.CDF(quantile)
Out[10]:
It also has a selection interval attached to it, computed using Will's UMAU calculations.
In [11]:
later_results.naive_interval, later_results.UMAU_interval
Out[11]:
In [12]:
sequential_test = SPRT(lambda n: 4 * np.sqrt(n), nmin=3, nmax=20)
mu, sigma = np.array([4,6]), 1
simulate = lambda mu: np.random.standard_normal((sequential_test.nmax,2)) * sigma + mu[None,:]
simulate(mu)
Out[12]:
In [13]:
Z = simulate(mu)
results = sequential_test(Z, sigma=sigma)
U = results.UMAU_interval
N = results.naive_interval
results.trunc_gauss.mu = N[0]
results.trunc_gauss.CDF(results.observed)
Out[13]:
In [14]:
results.trunc_gauss.mu = N[1]
results.trunc_gauss.CDF(results.observed)
Out[14]:
In [15]:
results.naive_interval, results.UMAU_interval
Out[15]:
In [16]:
results.outcome
Out[16]:
In [17]:
results.boundary
Out[17]:
In [18]:
quantile = results.trunc_gauss.quantile(0.05)
results.trunc_gauss.CDF(quantile)
Out[18]:
In [19]:
P = []
pivot = []
nsim = 500
naive = plt.figure(figsize=(10,8)).gca()
for i in range(nsim):
Z = simulate(mu)
results = sequential_test(Z, sigma=sigma)
results.alpha = 0.1
P.append(results.pvalue(truth=0))
pivot.append(results.pvalue(truth=mu[1]-mu[0]))
if i < 100:
if (results.naive_interval[0] < mu[1] - mu[0] and
results.naive_interval[1] > mu[1] - mu[0]):
naive.plot([i,i], results.naive_interval, linewidth=3, alpha=0.5, color='green')
else:
naive.plot([i,i], results.naive_interval, linewidth=3, alpha=0.5, color='red')
naive.plot([0,nsim], [mu[1]-mu[0],mu[1]-mu[0]], 'k--')
naive.plot([0,nsim], [0,0], 'gray')
naive.set_xlim([0,100])
Out[19]:
In [20]:
%load_ext rmagic
In [21]:
%%R -i P,pivot
plot(ecdf(P), col='green')
lines(ecdf(pivot), col='red')
ecdf(P)(0.05)
In [22]:
P = []
pivot = []
nsim = 500
naive = plt.figure(figsize=(10,8)).gca()
nominal = plt.figure(figsize=(10,8)).gca()
for i in range(nsim):
Z = simulate(mu)
results = sequential_test(Z, sigma=sigma, extra_samples=5)
results.alpha = 0.1
P.append(results.pvalue(truth=0))
pivot.append(results.pvalue(truth=mu[1]-mu[0]))
if i < 100:
if (results.naive_interval[0] < mu[1] - mu[0] and
results.naive_interval[1] > mu[1] - mu[0]):
naive.plot([i,i], results.naive_interval, linewidth=3, alpha=0.5, color='green')
else:
naive.plot([i,i], results.naive_interval, linewidth=3, alpha=0.5, color='red')
if (results.nominal_interval[0] < mu[1] - mu[0] and
results.nominal_interval[1] > mu[1] - mu[0]):
nominal.plot([i,i], results.nominal_interval, linewidth=3, alpha=0.5, color='green')
else:
nominal.plot([i,i], results.nominal_interval, linewidth=3, alpha=0.5, color='red')
naive.plot([0,nsim], [mu[1]-mu[0],mu[1]-mu[0]], 'k--')
naive.plot([0,nsim], [0,0], 'gray')
naive.set_xlim([0,100])
naive.title('Adjusted intervals')
nominal.plot([0,nsim], [mu[1]-mu[0],mu[1]-mu[0]], 'k--')
nominal.plot([0,nsim], [0,0], 'gray')
nominal.set_xlim([0,100])
nominal.title('Nominal intervals for same data')
In [ ]:
%%R -i P,pivot
plot(ecdf(P), col='green')
lines(ecdf(pivot), col='red')
ecdf(P)(0.05)
In [ ]:
sequential_test = SPRT(lambda n: 2 * np.sqrt(n), nmin=3, nmax=20)
mu, sigma = np.array([4,6]), 1. / np.sqrt(2)
simulate = lambda mu: np.random.standard_normal((sequential_test.nmax,2)) * sigma + mu[None,:]
In [54]:
P = []
pivot = []
nsim = 500
naive = plt.figure(figsize=(10,8)).gca()
for i in range(nsim):
Z = simulate(mu)
results = sequential_test(Z, sigma=sigma)
results.alpha = 0.1
P.append(results.pvalue(truth=0))
pivot.append(results.pvalue(truth=mu[1]-mu[0]))
if i < 100:
if (results.naive_interval[0] < mu[1] - mu[0] and
results.naive_interval[1] > mu[1] - mu[0]):
naive.plot([i,i], results.naive_interval, linewidth=3, alpha=0.5, color='green')
else:
naive.plot([i,i], results.naive_interval, linewidth=3, alpha=0.5, color='red')
naive.plot([0,nsim], [mu[1]-mu[0],mu[1]-mu[0]], 'k--')
naive.plot([0,nsim], [0,0], 'gray')
naive.set_xlim([0,100])
In [55]:
%%R -i P,pivot
plot(ecdf(P), col='green')
lines(ecdf(pivot), col='red')
ecdf(P)(0.05)
In short, with a wider boundary, the intervals seem smaller but our conditional test seems to have less power. Let's also take a few more samples.
In [56]:
P = []
pivot = []
nsim = 500
naive = plt.figure(figsize=(10,8)).gca()
for i in range(nsim):
Z = simulate(mu)
results = sequential_test(Z, sigma=sigma, extra_samples=5)
results.alpha = 0.1
P.append(results.pvalue(truth=0))
pivot.append(results.pvalue(truth=mu[1]-mu[0]))
if i < 100:
if (results.naive_interval[0] < mu[1] - mu[0] and
results.naive_interval[1] > mu[1] - mu[0]):
naive.plot([i,i], results.naive_interval, linewidth=3, alpha=0.5, color='green')
else:
naive.plot([i,i], results.naive_interval, linewidth=3, alpha=0.5, color='red')
naive.plot([0,nsim], [mu[1]-mu[0],mu[1]-mu[0]], 'k--')
naive.plot([0,nsim], [0,0], 'gray')
naive.set_xlim([0,100])
Out[56]:
In [57]:
%%R -i P,pivot
plot(ecdf(P), col='green')
lines(ecdf(pivot), col='red')
ecdf(P)(0.05)
In [28]: