In [ ]:
%load_ext rmagic
from ipy_table import make_table
import numpy as np

In [ ]:
%%R
strongstop <- function(p,alpha) {
   m <- length(p)
   sums <- rev(cumsum(rev(log(p)/(1:m))))
   max(which(sums < log(alpha * (1:m) / m)))
}
strongstop(c(rep(1e-10,10),runif(3000)),.05)

In [ ]:
%%R
forwardstop <- function(p, alpha) {
   m <- length(p)
   sums <- -(1/(1:m))*cumsum(log(1-p))
   max(which(sums < alpha))
}

In [ ]:
def strong_stop(p, alpha=0.3):
    %R -i p,alpha
    V = %R strongstop(p, alpha)
    return V[0]

def forward_stop(p, alpha=0.3):
    %R -i p,alpha
    V = %R forwardstop(p, alpha)
    return V[0]

In [ ]:
def when_null(hyps, sparsity=0):
    _when = []
    h_cumsum = np.cumsum(hyps, 1) 
    for i in range(hyps.shape[0]):
        try:
            idx = min(np.nonzero(h_cumsum[i] >= sparsity)[0]) + 1
        except ValueError:
            idx = hyps.shape[1] + 1
        _when.append(idx) 
    return np.array(_when) 

def summary(pvals, hyps, sparsity=0, rule='forward'):
    # stop is number selected
    stoprule = {'strong':strong_stop, 'forward':forward_stop}[rule]
    stop = np.array([max(stoprule(p, alpha=0.1),0) for p in pvals])
    fwer = np.mean([s > sparsity for s in stop])
    discoveries = np.array([hyp[:s].sum() for s,hyp in zip(stop, hyps)])
    varFDP = (stop - discoveries*1.) / (stop + (stop == 0))
    first_null = when_null(hyps, sparsity=sparsity)
    modelFDP = ((stop - sparsity) / (stop * 1. + (stop == 0))) * (stop >= sparsity)
    return np.mean(stop), np.mean(discoveries), np.mean(varFDP), np.mean(first_null), np.mean(fwer), np.mean(modelFDP)

In [ ]:
strong_stop(np.random.sample(1000))

Data-generating mechanism

  • $ n, p, \sigma = (50,100,1.5)$
  • $\tilde{X}[i] \sim N(0, I_{p \times p} + 0.49 \cdot 11^T), 1 \leq i \leq n$
  • $X = (I_{n \times n} - 1/n \cdot 11^T)\tilde{X}$.
  • for given nnz (number of non-zero) we set

        beta[:nnz] = np.linspace(4,4.5,nnz) 
  • $Y|X \sim N(X\beta, \sigma^2 I)$

So, our non-zero coefficients look like


In [ ]:
[(s, np.linspace(4,4.5,s)) for s in [0,1,2,5]]

Null model (0-sparse)


In [ ]:
sparsity = 0
reducedK = np.load('reduced_split_known%d.npy' % sparsity)
reducedU = np.load('reduced_split_unknown%d.npy' % sparsity)
reducedKF = np.load('reduced_splitfull_known%d.npy' % sparsity)
reducedUF = np.load('reduced_splitfull_unknown%d.npy' % sparsity)
covtest = np.load('covtest_split%d.npy' % sparsity)
spacings = np.load('spacings_split%d.npy' % sparsity)
sample_split = np.load('split%d.npy' % sparsity)
hypotheses = np.load('hypotheses_split_%d.npy' % sparsity)
hypotheses_full = np.load('hypotheses_splitfull_%d.npy' % sparsity)
first_null = when_null(hypotheses, sparsity=sparsity)
first_null_full = when_null(hypotheses_full, sparsity=sparsity)

In [ ]:
%%R -i reducedU,reducedK,covtest,spacings,sample_split,reducedKF,reducedUF -h 800 -w 800

par(mfrow=c(3,3))
for (i in 1:9) {
   plot(ecdf(spacings[,i]), col='orange', main=paste('ECDF Step', i))
   #plot(ecdf(reducedK[,i]), col='yellow', add=TRUE)
   plot(ecdf(reducedKF[,i]), col='cyan', add=TRUE)
   #plot(ecdf(reducedU[,i]), col='blue', add=TRUE, lty=2)
   plot(ecdf(reducedUF[,i]), col='green', add=TRUE)
   #plot(ecdf(covtest[,i]), col='red', add=TRUE)
   plot(ecdf(sample_split[,i]), col='purple', add=TRUE)
}

In [ ]:
%%R
pdf('null.pdf')
par(mfrow=c(3,3))
for (i in 1:9) {
   plot(ecdf(spacings[,i]), col='orange', main=paste('ECDF Step', i))
   #plot(ecdf(reducedK[,i]), col='yellow', add=TRUE)
   plot(ecdf(reducedKF[,i]), col='cyan', add=TRUE)
   #plot(ecdf(reducedU[,i]), col='blue', add=TRUE, lty=2)
   plot(ecdf(reducedUF[,i]), col='green', add=TRUE)
   #plot(ecdf(covtest[,i]), col='red', add=TRUE)
   plot(ecdf(sample_split[,i]), col='purple', add=TRUE)
}
dev.off()

1-sparse


In [ ]:
sparsity = 1
reducedK = np.load('reduced_split_known%d.npy' % sparsity)
reducedU = np.load('reduced_split_unknown%d.npy' % sparsity)
reducedKF = np.load('reduced_splitfull_known%d.npy' % sparsity)
reducedUF = np.load('reduced_splitfull_unknown%d.npy' % sparsity)
covtest = np.load('covtest_split%d.npy' % sparsity)
spacings = np.load('spacings_split%d.npy' % sparsity)
sample_split = np.load('split%d.npy' % sparsity)
hypotheses = np.load('hypotheses_split_%d.npy' % sparsity)
hypotheses_full = np.load('hypotheses_splitfull_%d.npy' % sparsity)
first_null = when_null(hypotheses, sparsity=sparsity)
first_null_full = when_null(hypotheses_full, sparsity=sparsity)

Strong stop


In [ ]:
make_table([['Method', 'Steps taken', 'True variables discovered', 'varFDP', 'Steps to find model', 'FWER', 'modelFDP']]
            + [(name,) + summary(data, hyps, sparsity=sparsity, rule='strong') for name, data, hyps in 
               zip(['Sample splitting', 'Reduced full data, unknown', 'Reduced full data, known', 'Full model, known'],
                   [sample_split, reducedUF, reducedKF, spacings],
                   [hypotheses, hypotheses_full, hypotheses_full, hypotheses_full])])

Forward stop


In [ ]:
make_table([['Method', 'Steps taken', 'True variables discovered', 'varFDP', 'Steps to find model', 'FWER', 'modelFDP']]
            + [(name,) + summary(data, hyps, sparsity=sparsity, rule='forward') for name, data, hyps in 
               zip(['Sample splitting', 'Reduced full data, unknown', 'Reduced full data, known', 'Full model, known'],
                   [sample_split, reducedUF, reducedKF, spacings],
                   [hypotheses, hypotheses_full, hypotheses_full, hypotheses_full])])

Mixture


In [ ]:
%%R -i reducedU,reducedK,covtest,spacings,sample_split,reducedKF,reducedUF,first_null,first_null_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   plot(ecdf(spacings[,i]), col='orange', main=paste('ECDF Step', i))
   plot(ecdf(reducedK[,i]), col='yellow', add=TRUE)
   plot(ecdf(covtest[,i]), col='red', add=TRUE)
   plot(ecdf(sample_split[,i]), col='purple', add=TRUE)   
   plot(ecdf(reducedU[,i]), col='blue', add=TRUE, lty=2)
   plot(ecdf(reducedKF[,i]), col='cyan', add=TRUE)
   plot(ecdf(reducedUF[,i]), col='green', add=TRUE)
}

Null p-values


In [ ]:
%%R -i hypotheses,hypotheses_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   null_Ps = (i >= first_null+1)
   if (sum(null_Ps) > 0) {
      plot(ecdf(spacings[null_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[null_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[null_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[null_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[null_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   null_Ps = (i >= first_null_full+1)
   if (sum(null_Ps) > 0) {
       plot(ecdf(reducedKF[null_Ps,i]), col='cyan', add=TRUE)
       plot(ecdf(reducedUF[null_Ps,i]), col='green', add=TRUE)
   }
}

Non-null p-values


In [ ]:
%%R -i hypotheses,hypotheses_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   null_Ps = (i >= first_null+1)
   any_null=FALSE
    if (sum(!null_Ps) > 0) {
      any_null = TRUE
      plot(ecdf(spacings[!null_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[!null_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[!null_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[!null_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[!null_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   null_Ps = (i >= first_null_full+1)
   if (sum(!null_Ps) > 0) {
       plot(ecdf(reducedKF[!null_Ps,i]), col='cyan', add=any_null, main=paste('ECDF Step', i))
       plot(ecdf(reducedUF[!null_Ps,i]), col='green', add=TRUE)
    }
}

Last step


In [ ]:
%%R -i hypotheses,hypotheses_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   null_Ps = (i == first_null)
   any_null=FALSE
    if (sum(null_Ps) > 0) {
      any_null = TRUE
      plot(ecdf(spacings[null_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[null_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[null_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[null_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[null_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   null_Ps = (i == first_null_full)
   if (sum(null_Ps) > 0) {
       plot(ecdf(reducedKF[null_Ps,i]), col='cyan', add=any_null, main=paste('ECDF Step', i))
       plot(ecdf(reducedUF[null_Ps,i]), col='green', add=TRUE)
    }
}

Discoveries (when we had a true variable added to some model)


In [ ]:
%%R  -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   alt_Ps = (hypotheses[,i] == 1)
   if (sum(alt_Ps) > 0) {
      plot(ecdf(spacings[alt_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[alt_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[alt_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[alt_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[alt_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   alt_Ps = (hypotheses_full[,i] == 1)
   if (sum(alt_Ps) > 0) {
       plot(ecdf(reducedKF[alt_Ps,i]), col='cyan', add=TRUE)
       plot(ecdf(reducedUF[alt_Ps,i]), col='green', add=TRUE)
   }
}

2-sparse


In [ ]:
sparsity = 2
reducedK = np.load('reduced_split_known%d.npy' % sparsity)
reducedU = np.load('reduced_split_unknown%d.npy' % sparsity)
reducedKF = np.load('reduced_splitfull_known%d.npy' % sparsity)
reducedUF = np.load('reduced_splitfull_unknown%d.npy' % sparsity)
covtest = np.load('covtest_split%d.npy' % sparsity)
spacings = np.load('spacings_split%d.npy' % sparsity)
sample_split = np.load('split%d.npy' % sparsity)
hypotheses = np.load('hypotheses_split_%d.npy' % sparsity)
hypotheses_full = np.load('hypotheses_splitfull_%d.npy' % sparsity)
first_null = when_null(hypotheses, sparsity=sparsity)
first_null_full = when_null(hypotheses_full, sparsity=sparsity)

Strong stop


In [ ]:
table = ([['Method', 'Steps taken', 'True variables discovered', 'varFDP', 'Steps to find model', 'FWER', 'modelFDP']]
        + [(name,) + summary(data, hyps, sparsity=sparsity, rule='strong') for name, data, hyps in 
                   zip(['Sample splitting', 'Reduced full data, unknown', 'Reduced full data, known', 'Full model, known'],
                   [sample_split, reducedUF, reducedKF, spacings],
                   [hypotheses, hypotheses_full, hypotheses_full, hypotheses_full])])

In [ ]:
make_table(table)
tex_table = r'''
\begin{tabular}[ccccccc] \hline
%s
\end{tabular}
''' % '\n'.join([(' & '.join([str(s) for s in row]) + r'\\ \hline') for row in table])
print tex_table

Forward stop


In [ ]:
table =([['Method', 'Steps taken', 'True variables discovered', 'varFDP', 'Steps to find model', 'FWER', 'modelFDP']]
            + [(name,) + summary(data, hyps, sparsity=sparsity, rule='forward') for name, data, hyps in 
               zip(['Sample splitting', 'Reduced full data, unknown', 'Reduced full data, known', 'Full model, known'],
                   [sample_split, reducedUF, reducedKF, spacings],
                   [hypotheses, hypotheses_full, hypotheses_full, hypotheses_full])])

In [ ]:
make_table(table)
tex_table = r'''
\begin{tabular}[ccccccc] \hline
%s
\end{tabular}
''' % '\n'.join([(' & '.join([str(s) for s in row]) + r'\\ \hline') for row in table])
print tex_table

Mixture


In [ ]:
%%R -i reducedU,reducedK,covtest,spacings,sample_split,reducedKF,reducedUF,first_null,first_null_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   plot(ecdf(spacings[,i]), col='orange', main=paste('ECDF Step', i))
   plot(ecdf(reducedK[,i]), col='yellow', add=TRUE)
   plot(ecdf(reducedKF[,i]), col='cyan', add=TRUE)
   plot(ecdf(reducedU[,i]), col='blue', add=TRUE, lty=2)
   plot(ecdf(reducedUF[,i]), col='green', add=TRUE)
   plot(ecdf(covtest[,i]), col='red', add=TRUE)
   plot(ecdf(sample_split[,i]), col='purple', add=TRUE)
}

In [ ]:
%%R
par(mfrow=c(3,3))
for (i in 1:9) {
   plot(ecdf(spacings[,i]), col='orange', main=paste('ECDF Step', i))
   #plot(ecdf(reducedK[,i]), col='yellow', add=TRUE)
   plot(ecdf(reducedKF[,i]), col='cyan', add=TRUE)
   #plot(ecdf(reducedU[,i]), col='blue', add=TRUE, lty=2)
   plot(ecdf(reducedUF[,i]), col='green', add=TRUE)
   #plot(ecdf(covtest[,i]), col='red', add=TRUE)
   plot(ecdf(sample_split[,i]), col='purple', add=TRUE)
}

In [ ]:
%%R
pdf('model5sparse.pdf')
par(mfrow=c(3,3))
for (i in 1:9) {
   plot(ecdf(spacings[,i]), col='orange', main=paste('ECDF Step', i))
   #plot(ecdf(reducedK[,i]), col='yellow', add=TRUE)
   plot(ecdf(reducedKF[,i]), col='cyan', add=TRUE)
   #plot(ecdf(reducedU[,i]), col='blue', add=TRUE, lty=2)
   plot(ecdf(reducedUF[,i]), col='green', add=TRUE)
   #plot(ecdf(covtest[,i]), col='red', add=TRUE)
   plot(ecdf(sample_split[,i]), col='purple', add=TRUE)
}
dev.off()

Null p-values


In [ ]:
%%R -i hypotheses,hypotheses_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   null_Ps = (i >= first_null+1)
   if (sum(null_Ps) > 0) {
      plot(ecdf(spacings[null_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[null_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[null_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[null_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[null_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   null_Ps = (i >= first_null_full+1)
   if (sum(null_Ps) > 0) {
       plot(ecdf(reducedKF[null_Ps,i]), col='cyan', add=TRUE)
       plot(ecdf(reducedUF[null_Ps,i]), col='green', add=TRUE)
   }
}

Non-null p-values


In [ ]:
%%R -i hypotheses,hypotheses_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   null_Ps = (i >= first_null+1)
   any_null=FALSE
    if (sum(!null_Ps) > 0) {
      any_null = TRUE
      plot(ecdf(spacings[!null_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[!null_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[!null_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[!null_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[!null_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   null_Ps = (i >= first_null_full+1)
   if (sum(!null_Ps) > 0) {
       plot(ecdf(reducedKF[!null_Ps,i]), col='cyan', add=any_null, main=paste('ECDF Step', i))
       plot(ecdf(reducedUF[!null_Ps,i]), col='green', add=TRUE)
    }
}

Last step


In [ ]:
%%R -i hypotheses,hypotheses_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   null_Ps = (i == first_null)
   any_null=FALSE
    if (sum(null_Ps) > 0) {
      any_null = TRUE
      plot(ecdf(spacings[null_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[null_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[null_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[null_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[null_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   null_Ps = (i == first_null_full)
   if (sum(null_Ps) > 0) {
       plot(ecdf(reducedKF[null_Ps,i]), col='cyan', add=any_null, main=paste('ECDF Step', i))
       plot(ecdf(reducedUF[null_Ps,i]), col='green', add=TRUE)
    }
}

Discoveries (when we had a true variable added to some model)


In [ ]:
%%R  -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   alt_Ps = (hypotheses[,i] == 1)
   if (sum(alt_Ps) > 0) {
      plot(ecdf(spacings[alt_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[alt_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[alt_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[alt_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[alt_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   alt_Ps = (hypotheses_full[,i] == 1)
   if (sum(alt_Ps) > 0) {
       plot(ecdf(reducedKF[alt_Ps,i]), col='cyan', add=TRUE)
       plot(ecdf(reducedUF[alt_Ps,i]), col='green', add=TRUE)
   }
}

5-sparse


In [ ]:
sparsity = 5
reducedK = np.load('reduced_split_known%d.npy' % sparsity)
reducedU = np.load('reduced_split_unknown%d.npy' % sparsity)
reducedKF = np.load('reduced_splitfull_known%d.npy' % sparsity)
reducedUF = np.load('reduced_splitfull_unknown%d.npy' % sparsity)
covtest = np.load('covtest_split%d.npy' % sparsity)
spacings = np.load('spacings_split%d.npy' % sparsity)
sample_split = np.load('split%d.npy' % sparsity)
hypotheses = np.load('hypotheses_split_%d.npy' % sparsity)
hypotheses_full = np.load('hypotheses_splitfull_%d.npy' % sparsity)
first_null = when_null(hypotheses, sparsity=sparsity)
first_null_full = when_null(hypotheses_full, sparsity=sparsity)

Strong stop


In [ ]:
make_table([['Method', 'Steps taken', 'True variables discovered', 'varFDP', 'Steps to find model', 'FWER', 'modelFDP']]
            + [(name,) + summary(data, hyps, sparsity=sparsity, rule='strong') for name, data, hyps in 
               zip(['Sample splitting', 'Reduced full data, unknown', 'Reduced full data, known', 'Full model, known'],
                   [sample_split, reducedUF, reducedKF, spacings],
                   [hypotheses, hypotheses_full, hypotheses_full, hypotheses_full])])

Forward stop


In [ ]:
make_table([['Method', 'Steps taken', 'True variables discovered', 'varFDP', 'Steps to find model', 'FWER', 'modelFDP']]
            + [(name,) + summary(data, hyps, sparsity=sparsity, rule='forward') for name, data, hyps in 
               zip(['Sample splitting', 'Reduced full data, unknown', 'Reduced full data, known', 'Full model, known'],
                   [sample_split, reducedUF, reducedKF, spacings],
                   [hypotheses, hypotheses_full, hypotheses_full, hypotheses_full])])

Null p-values


In [ ]:
%%R -i reducedU,reducedK,covtest,spacings,sample_split,reducedKF,reducedUF,first_null,first_null_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   plot(ecdf(spacings[,i]), col='orange', main=paste('ECDF Step', i))
   plot(ecdf(reducedK[,i]), col='yellow', add=TRUE)
   plot(ecdf(reducedKF[,i]), col='cyan', add=TRUE)
   plot(ecdf(reducedU[,i]), col='blue', add=TRUE, lty=2)
   plot(ecdf(reducedUF[,i]), col='green', add=TRUE)
   plot(ecdf(covtest[,i]), col='red', add=TRUE)
   plot(ecdf(sample_split[,i]), col='purple', add=TRUE)
}

Null p-values


In [ ]:
%%R -i hypotheses,hypotheses_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   null_Ps = (i >= first_null+1)
   any_null=FALSE
    if (sum(null_Ps) > 0) {
      any_null = TRUE
      plot(ecdf(spacings[null_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[null_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[null_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[null_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[null_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   null_Ps = (i >= first_null_full+1)
   if (sum(null_Ps) > 0) {
       plot(ecdf(reducedKF[null_Ps,i]), col='cyan', add=any_null, main=paste('ECDF Step', i))
       plot(ecdf(reducedUF[null_Ps,i]), col='green', add=TRUE)
    }
}

Non-null p-values


In [ ]:
%%R -i hypotheses,hypotheses_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   null_Ps = (i >= first_null+1)
   any_null=FALSE
    if (sum(!null_Ps) > 0) {
      any_null = TRUE
      plot(ecdf(spacings[!null_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[!null_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[!null_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[!null_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[!null_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   null_Ps = (i >= first_null_full+1)
   if (sum(!null_Ps) > 0) {
       plot(ecdf(reducedKF[!null_Ps,i]), col='cyan', add=any_null, main=paste('ECDF Step', i))
       plot(ecdf(reducedUF[!null_Ps,i]), col='green', add=TRUE)
    }
}

Last step


In [ ]:
%%R -i hypotheses,hypotheses_full -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   null_Ps = (i == first_null)
   any_null=FALSE
    if (sum(null_Ps) > 0) {
      any_null = TRUE
      plot(ecdf(spacings[null_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[null_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[null_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[null_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[null_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   null_Ps = (i == first_null_full)
   if (sum(null_Ps) > 0) {
       plot(ecdf(reducedKF[null_Ps,i]), col='cyan', add=any_null, main=paste('ECDF Step', i))
       plot(ecdf(reducedUF[null_Ps,i]), col='green', add=TRUE)
    }
}

Discoveries (when we had a true variable added to some model)


In [ ]:
%%R  -h 800 -w 800
par(mfrow=c(3,3))
for (i in 1:9) {
   alt_Ps = (hypotheses[,i] == 1)
   if (sum(alt_Ps) > 0) {
      plot(ecdf(spacings[alt_Ps,i]), col='orange', main=paste('ECDF Step', i))
      plot(ecdf(reducedK[alt_Ps,i]), col='yellow', add=TRUE)
      plot(ecdf(covtest[alt_Ps,i]), col='red', add=TRUE)
      plot(ecdf(sample_split[alt_Ps,i]), col='purple', add=TRUE)   
      plot(ecdf(reducedU[alt_Ps,i]), col='blue', add=TRUE, lty=2)
   }
   alt_Ps = (hypotheses_full[,i] == 1)
   if (sum(alt_Ps) > 0) {
       plot(ecdf(reducedKF[alt_Ps,i]), col='cyan', add=TRUE)
       plot(ecdf(reducedUF[alt_Ps,i]), col='green', add=TRUE)
   }
}

In [ ]:


In [ ]: