PCA of high dimensional random walks



In [1]:

    
%pylab inline
import sklearn.decomposition









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
n_dims = 10000
n_steps = 1000



In [3]:

    
walk = np.zeros((n_steps, n_dims))
for i in range(1, n_steps):
    walk[i] = walk[i-1] + np.random.normal(scale=1/np.sqrt(n_dims), size=n_dims)



In [4]:

    
pca = sklearn.decomposition.PCA()



In [5]:

    
pca_walk = pca.fit_transform(walk)



In [6]:

    
# The observed variance in the first PCA component
pca.explained_variance_ratio_[0]









    Out[6]:





0.6084576781019368



In [7]:

    
# Predicted value
6 / np.pi**2









    Out[7]:





0.6079271018540267



In [8]:

    
# Plot the projection of the trajectory onto the first few PCA components
for i in range(5):
    plot(pca_walk[:, i], label='PCA %d' % i)

xlabel('Step')
ylabel('Distance along component')
legend(loc='upper right', fontsize=10);