PCA of high dimensional random walks


In [1]:
%pylab inline
import sklearn.decomposition


Populating the interactive namespace from numpy and matplotlib

In [2]:
n_dims = 10000
n_steps = 1000

In [3]:
walk = np.zeros((n_steps, n_dims))
for i in range(1, n_steps):
    walk[i] = walk[i-1] + np.random.normal(scale=1/np.sqrt(n_dims), size=n_dims)

In [4]:
pca = sklearn.decomposition.PCA()

In [5]:
pca_walk = pca.fit_transform(walk)

In [6]:
# The observed variance in the first PCA component
pca.explained_variance_ratio_[0]


Out[6]:
0.6084576781019368

In [7]:
# Predicted value
6 / np.pi**2


Out[7]:
0.6079271018540267

In [8]:
# Plot the projection of the trajectory onto the first few PCA components
for i in range(5):
    plot(pca_walk[:, i], label='PCA %d' % i)

xlabel('Step')
ylabel('Distance along component')
legend(loc='upper right', fontsize=10);