In [42]:
%load_ext rmagic
%pylab inline
import numpy as np


The rmagic extension is already loaded. To reload it, use:
  %reload_ext rmagic
Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['grid']
`%matplotlib` prevents importing * from pylab and numpy

In [43]:
%%R
library(RMTstat)
###########################################################
# function: TW
# --------------
# Tracy-Widom test at step k
# returns the probability of P(TW > t) where the t is the observed value
# singulars: singular values of Y'Y/N
# N: nrow of Y
# p: ncol of Y
# stepK: the step k of in the sequential test.
# sig: sigma square value. if sigma^2 is known, plug it in. Otherwise, use estSig function to estimate it.

TW = function(singulars, N, p, stepK, sig){
	signp = (sqrt(N-0.5) + sqrt(p-stepK-0.5))*(1/sqrt(N-0.5) + 1/sqrt(p-stepK-0.5))^(1/3)/N
	centEig = ( singulars[stepK]/sig - (sqrt(N-0.5) + sqrt(p-stepK-0.5))^2/N )/signp
	return(1-ptw(centEig))
}

In [44]:
def TW(eigenvals, n, p, sig):
    """
    Step 1 of the sequential PCA algorithm using TW approximation.
    """
    %R -i eigenvals,n,p,sig
    V = %R TW(eigenvals, n, p, 1, sig)
    return V[0]

In [45]:
n, p, sigma = 50, 10, 2
X = np.random.standard_normal((50,10))*sigma
U, D, V = np.linalg.svd(X)
TW(D**2/n,n,p,sig**2)


Out[45]:
0.71815010033446947

Null behavior


In [ ]:
from selection.pca import pvalue
from statsmodels.distributions import ECDF

P_exact = []
P_TW = []
nsim = 2000

for _ in range(nsim):
    X = np.random.standard_normal((n,p)) * sigma
    U, D, V = np.linalg.svd(X)
    P_exact.append(pvalue(X, sigma))
    P_TW.append(TW(D**2/n,n,p,sigma**2))

In [ ]:
grid = np.linspace(0,1,101)
null_fig = plt.figure(figsize=(8,8))
null_ax = null_fig.gca()
null_ax.plot(grid, ECDF(P_exact)(grid), label='exact')
null_ax.plot(grid, ECDF(P_TW)(grid), label='Tracy-Widom')
null_ax.legend(loc='lower right')

In [47]: