In [1]:
%load_ext rmagic
%pylab inline
import numpy as np
from selection.isotonic import isotonic
import selection.constraints as C
C.WARNINGS = False # do we want to see warnings
We will solve the problem $$ \hat{\mu}_{+} = \text{argmin}_{\mu \in \mathbb{R}^n: \mu_{i+1} \geq \mu_i, 1 \leq i \leq n-1} \frac{1}{2} \|y-\mu\|^2_2 = \text{argmin}_{\mu \in \mathbb{R}^n: \mu_{i+1} \geq \mu_i, 1 \leq i \leq n-1} \frac{1}{2} \left[ \|P(y-\mu)\|^2_2 + \|P^{\perp}(y-\mu)\|^2_2 \right] $$ with $$ P = I - \frac{1}{n} 11^T. $$
There are many ways to solve this problem of course, but we reparameterize $$ \beta = D\mu, \alpha = \bar{\mu} $$ where $D[i,] = e_{i+1}-e_i, 1 \leq i \leq n-1$ is the matrix of first differences.
We see that in $\beta$, the problem is $$ \hat{\beta}_+ = \text{argmin}_{\beta \in \mathbb{R}^{n-1}: \beta_i \geq 0} \frac{1}{2} \|P(y- D^{\dagger}\beta)\|^2_2 $$ and $$ \hat{\alpha} = \bar{y}. $$ For ease of notation, we will set $X=PD^{\dagger}$ throughout.
Having found $\hat{\beta}_+$ we can find $\hat{\mu}_+$ by $$ \hat{\mu}_+ = \bar{y} \cdot 1 + X\hat{\beta}_+ $$
The KKT conditions for the $\beta$ problem are defined in terms of a quantity analgous to the equicorrelation set: $$ {\cal E} = {\cal E}(y) = \left\{i: X_i^T(y-X\hat{\beta}_+(y))=0 \right\} $$ and the two blocks of the KKT conditions would be $$ \begin{aligned} X_{\cal E}^T(y-X\hat{\beta}_+) &= 0 \\ X_{-\cal E}^T(y-X\hat{\beta}_+) &<0 \\ X_{\cal E}^{\dagger}y & > 0 \end{aligned} $$
As in the LASSO case, these equations can be solved explicitly given the value of ${\cal E}$: $$ \begin{aligned} \hat{\beta}_{+,\cal E} &= X_{\cal E}^{\dagger}y \\ \hat{\beta}_{+,-{\cal E}} &= 0 \\ \max_{i \in {-\cal E}}X_i^T(I-P_{\cal E})y &< 0. \end{aligned}. $$
This determines our $(A,b)$ pair: $$ \begin{aligned} A(y) &= -X_{-{\cal E}(y)}^T(I-P_{{\cal E}(y)})y \\ b(y) &= 0_{-{\cal E}(y)}. \end{aligned} $$
Let's check that our function works.
In [2]:
Y = np.random.standard_normal(10)
I = isotonic(Y)
The subgradient is a subgradient of the nonnegative constraint at $\hat{\beta}_+$ so it should be elementwise nonpositive and exactly zero at indices $i$ where $\hat{\mu}[i+1]-\hat{\mu}[i]>0$.
In [3]:
print I.subgrad
In [4]:
print I.fitted[1:] - I.fitted[:-1]
It passes this check OK. As expected, the fitted values are non-decreasing.
Finally, we have chosen the constraint to be such that np.dot(constraints, Y) are elementwise non-negative. Let's verify this:
In [5]:
print np.dot(I.constraints.inequality, Y)
At a first question, we might want to test whether $\mu_2=0$, say.
In [6]:
n = Y.shape[0]
eta = np.zeros(n)
eta[1] = 1
Vplus, eta_dot_Y, Vminus, sigma_eta = I.constraints.bounds(eta, Y)
print Vplus, eta_dot_Y, Vminus, sigma_eta
In [7]:
%%R -o mu
mu = c(rep(0,20), rep(2.4,20), rep(4,10))
plot(isoreg(rnorm(50)*0.5+mu), xlab='t', ylab='Y')
lines(mu, type='s', lwd=3, col='blue')
In [8]:
def simulate_fixed(n, j, mu=0):
Y = np.random.standard_normal(n)*0.5 + mu
I = isotonic(Y, sigma=0.5)
eta = np.zeros(n)
eta[j:] = 1
eta -= eta.mean()
return I.constraints.pivot(eta, Y, 'greater')
This test uses the (usually unknown) position of the true jump in the signal. In our simulation, this jump is at $t=20$. We might also try at an incorrect value, say $t=10$.
In [9]:
P0, Pa_correct, Pa_incorrect = [], [], []
n, j_correct, j_incorrect, nsim = 50, 20, 10, 1000
for _ in range(nsim):
P0.append(simulate_fixed(n,j_correct))
Pa_correct.append(simulate_fixed(n,j_correct,mu=mu))
Pa_incorrect.append(simulate_fixed(n,j_incorrect,mu=mu))
In [10]:
%%R -i P0,Pa_correct,Pa_incorrect
plot(ecdf(P0), col='red', lwd=3)
plot(ecdf(Pa_correct), col='blue', add=TRUE, lwd=3)
plot(ecdf(Pa_incorrect), col='green', add=TRUE, lwd=3)
abline(0,1)
In [11]:
def simulate_random_constraint(n, mu=0):
Y = np.random.standard_normal(n)*0.5 + mu
I = isotonic(Y, sigma=0.5)
rand_idx = np.random.randint(low=0, high=I.constraints.inequality.shape[0]-1)
eta = I.constraints.inequality[rand_idx]
return I.constraints.pivot(eta, Y, 'greater')
In [12]:
P0, Pa = [], []
for _ in range(nsim):
pval = simulate_random_constraint(n)
P0.append(pval)
pval = simulate_random_constraint(n, mu=mu)
Pa.append(pval)
In [13]:
%%R -i P0,Pa
P0 = P0[P0>0]
P0 = P0[P0<1]
plot(ecdf(P0), col='red', lwd=3)
Pa = Pa[Pa>0]
Pa = Pa[Pa<1]
plot(ecdf(Pa), col='blue', lwd=3, add=TRUE)
abline(0,1)
This has almost no power to reject $H_0$, not surprisingly.
In [14]:
def simulate_first_jump(n, mu=0):
Y = np.random.standard_normal(n)*0.5 + mu
I = isotonic(Y, sigma=0.5)
J = I.first_jump
if J is not None:
return J[-1]
return J
In [15]:
P0, Pa = [], []
for _ in range(nsim):
pval = simulate_first_jump(n)
if pval is not None:
P0.append(pval)
pval = simulate_first_jump(n, mu=mu)
if pval is not None:
Pa.append(pval)
In [16]:
%%R -i P0,Pa
P0 = P0[P0>0]
P0 = P0[P0<1]
plot(ecdf(P0), col='red', lwd=3)
Pa = Pa[Pa>0]
Pa = Pa[Pa<1]
plot(ecdf(Pa), col='blue', lwd=3, add=TRUE)
abline(0,1)
This does not seem to have much power. This would be the case if the first jump almost never happened at $t=20$.
In [17]:
def simulate_largest_jump(n, mu=0):
Y = np.random.standard_normal(n)*0.5 + mu
I = isotonic(Y, sigma=0.5)
J = I.largest_jump_univariate
if J is not None:
return J[-1]
return J
In [18]:
P0, Pa_large = [], []
for _ in range(nsim):
pval = simulate_largest_jump(n)
if pval is not None:
P0.append(pval)
pval = simulate_largest_jump(n, mu=mu)
if pval is not None:
Pa_large.append(pval)
In [19]:
%%R -i P0,Pa_large
P0 = P0[P0>0]
P0 = P0[P0<1]
plot(ecdf(P0), col='red', lwd=3)
Pa_large = Pa_large[Pa_large>0]
Pa_large = Pa_large[Pa_large<1]
plot(ecdf(Pa_large), col='green', lwd=3, add=TRUE)
plot(ecdf(Pa_correct), col='blue', lwd=3, add=TRUE)
abline(0,1)
Ideally, we might try pooling some or all of the jumps into some sort of $\chi^2$ test. This is indeed possible. Here, we combine the first $k$ jumps into a $\chi^2_k$ subject to the constraints imposed by the isotonic regression.
In [20]:
def simulate_combining_jumps(n, k=3, mu=0):
Y = np.random.standard_normal(n)*0.5 + mu
I = isotonic(Y, sigma=0.5)
return I.combine_jumps(k)
simulate_combining_jumps(10, 3, 0)
Out[20]:
In [21]:
P0, Pa_comb = [], []
for _ in range(nsim):
pval = simulate_combining_jumps(n, 5)
if pval is not None:
P0.append(pval)
pval = simulate_combining_jumps(n, 5, mu=mu)
if pval is not None:
Pa_comb.append(pval)
In [22]:
%%R -i P0,Pa_comb
P0 = P0[P0>0]
P0 = P0[P0<1]
Pa_comb = Pa_comb[Pa_comb>=0]
Pa_comb = Pa_comb[Pa_comb<1]
plot(ecdf(P0), col='red', lwd=3)
plot(ecdf(Pa_comb), col='purple', lwd=3, add=TRUE)
plot(ecdf(Pa_correct), col='blue', lwd=3, add=TRUE)
plot(ecdf(Pa_large), col='green', lwd=3, add=TRUE)
abline(0,1)
Our last test sums all of the jumps to form an $\eta$. This has pretty good power.
In [23]:
def simulate_summing_jumps(n, k=3, mu=0):
Y = np.random.standard_normal(n)*0.5 + mu
I = isotonic(Y, sigma=0.5)
return I.sum_jumps(k)
simulate_summing_jumps(20, 3, 0)
Out[23]:
In [24]:
P0, Pa_sum = [], []
for _ in range(nsim):
pval = simulate_summing_jumps(n, n)
if pval is not None:
P0.append(pval[-1])
pval = simulate_summing_jumps(n, n, mu=mu)
if pval is not None:
Pa_sum.append(pval[-1])
In [37]:
%%R -i P0,Pa_sum
P0 = P0[P0>0]
P0 = P0[P0<1]
Pa_sum = Pa_sum[Pa_sum>=0]
Pa_sum = Pa_sum[Pa_sum<1]
plot(ecdf(P0), col='red', lwd=3)
Pa_comb = Pa_comb[Pa_comb>0]
Pa_comb = Pa_comb[Pa_comb<1]
plot(ecdf(Pa_comb), col='purple', lwd=3, add=TRUE)
plot(ecdf(Pa_correct), col='blue', lwd=3, add=TRUE)
plot(ecdf(Pa_large), col='orange', lwd=3, add=TRUE)
plot(ecdf(Pa_sum), col='green', lwd=3, add=TRUE)
abline(0,1)
In [26]:
def coverage_largest_jump(n, mu=0):
Y = np.random.standard_normal(n)*0.5 + mu
I = isotonic(Y, sigma=0.5)
value = I.largest_jump_univariate_interval
if value is not None:
eta, interval = value
truth = np.dot(eta, mu)
estimate = np.dot(eta, Y)
lower, upper = interval
if lower < truth and truth < upper:
covered = True
else:
covered = False
return truth, estimate, lower, upper, covered
In [27]:
results = []
for _ in range(100):
value = coverage_largest_jump(n, mu=mu)
if value:
results.append(value)
In [28]:
f = plt.figure(figsize=(12,7))
j = 0
for i in range(100):
truth, estimate, lower, upper, covered = results[i]
# get rid of some failures:
if max(np.fabs(lower), np.fabs(upper)) < 1e4 and np.fabs(estimate) > 1.e-4:
if not covered:
plt.plot([j,j], [lower, upper], linewidth=4, color='red', alpha=0.7)
else:
plt.plot([j,j], [lower, upper], linewidth=4, color='green', alpha=0.7)
j += 1
plt.scatter(j, truth, c='k', s=30)
plt.gca().set_ylim([-20,10])
plt.gca().set_xlim([-1,j+1])
Out[28]:
In [29]:
results = []
for _ in range(100):
value = coverage_largest_jump(n, mu=5*mu)
if value:
results.append(value)
In [30]:
f = plt.figure(figsize=(12,7))
j = 0
for i in range(100):
truth, estimate, lower, upper, covered = results[i]
# get rid of some failures:
if max(np.fabs(lower), np.fabs(upper)) < 1e4 and np.fabs(estimate) > 1.e-4:
if not covered:
plt.plot([j,j], [lower, upper], linewidth=4, color='red', alpha=0.7)
else:
plt.plot([j,j], [lower, upper], linewidth=4, color='green', alpha=0.7)
j += 1
plt.scatter(j, truth, c='k', s=30)
plt.gca().set_ylim([-60,-40])
#plt.gca().set_xlim([-1,j+1])
Out[30]:
We can also look at the sum of the largest $k$ jumps:
In [31]:
def coverage_sum_jumps(n, k=3, mu=0):
Y = np.random.standard_normal(n)*0.5 + mu
I = isotonic(Y, sigma=0.5)
value = I.sum_jumps_interval(k)
if value:
eta, interval = value
truth = np.dot(eta, mu)
estimate = np.dot(eta, Y)
lower, upper = interval
if lower < truth and truth < upper:
covered = True
else:
covered = False
return truth, estimate, lower, upper, covered
In [32]:
results = []
for _ in range(100):
value = coverage_sum_jumps(n, n, mu=mu)
if value:
results.append(value)
In [33]:
f = plt.figure(figsize=(12,7))
for i in range(100):
truth, estimate, lower, upper, covered = results[i]
if not covered:
plt.plot([i,i], [lower, upper], linewidth=4, color='red', alpha=0.7)
else:
plt.plot([i,i], [lower, upper], linewidth=4, color='green', alpha=0.7)
plt.scatter(i, truth, c='k', s=30)
plt.gca().set_ylim([-5,15])
Out[33]:
Or, even an interval at a fixed point $t$.
In [34]:
def interval_fixed(n, j, mu=0):
Y = np.random.standard_normal(n)*0.5 + mu
I = isotonic(Y, sigma=0.5)
eta = np.zeros(n)
eta[j] = 1
lower, upper = C.selection_interval(I.constraints.inequality, \
np.zeros(I.constraints.inequality.shape[0]),
0.5**2 * np.identity(n),
Y,
eta, dps=17,
upper_target=0.95,
lower_target=0.05)
truth = np.dot(eta, mu)
estimate = np.dot(eta, Y)
if lower < truth and truth < upper:
covered = True
else:
covered = False
return truth, estimate, lower, upper, covered
In [35]:
results = []
for _ in range(100):
value = interval_fixed(n, 30, mu=mu)
if value:
results.append(value)
In [36]:
f = plt.figure(figsize=(12,7))
for i in range(len(results)):
truth, estimate, lower, upper, covered = results[i]
if not covered:
plt.plot([i,i], [lower, upper], linewidth=4, color='red', alpha=0.7)
else:
plt.plot([i,i], [lower, upper], linewidth=4, color='green', alpha=0.7)
plt.scatter(i, truth, c='k', s=30)
plt.gca().set_ylim([-5,15])
Out[36]:
In [36]: