In [42]:
%load_ext rmagic
%pylab inline
import numpy as np
In [43]:
%%R
library(RMTstat)
###########################################################
# function: TW
# --------------
# Tracy-Widom test at step k
# returns the probability of P(TW > t) where the t is the observed value
# singulars: singular values of Y'Y/N
# N: nrow of Y
# p: ncol of Y
# stepK: the step k of in the sequential test.
# sig: sigma square value. if sigma^2 is known, plug it in. Otherwise, use estSig function to estimate it.
TW = function(singulars, N, p, stepK, sig){
signp = (sqrt(N-0.5) + sqrt(p-stepK-0.5))*(1/sqrt(N-0.5) + 1/sqrt(p-stepK-0.5))^(1/3)/N
centEig = ( singulars[stepK]/sig - (sqrt(N-0.5) + sqrt(p-stepK-0.5))^2/N )/signp
return(1-ptw(centEig))
}
In [44]:
def TW(eigenvals, n, p, sig):
"""
Step 1 of the sequential PCA algorithm using TW approximation.
"""
%R -i eigenvals,n,p,sig
V = %R TW(eigenvals, n, p, 1, sig)
return V[0]
In [45]:
n, p, sigma = 50, 10, 2
X = np.random.standard_normal((50,10))*sigma
U, D, V = np.linalg.svd(X)
TW(D**2/n,n,p,sig**2)
Out[45]:
In [ ]:
from selection.pca import pvalue
from statsmodels.distributions import ECDF
P_exact = []
P_TW = []
nsim = 2000
for _ in range(nsim):
X = np.random.standard_normal((n,p)) * sigma
U, D, V = np.linalg.svd(X)
P_exact.append(pvalue(X, sigma))
P_TW.append(TW(D**2/n,n,p,sigma**2))
In [ ]:
grid = np.linspace(0,1,101)
null_fig = plt.figure(figsize=(8,8))
null_ax = null_fig.gca()
null_ax.plot(grid, ECDF(P_exact)(grid), label='exact')
null_ax.plot(grid, ECDF(P_TW)(grid), label='Tracy-Widom')
null_ax.legend(loc='lower right')
In [47]: