In [1]:
# (p. 213) Principal Components Analysis
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

from sklearn.datasets import make_moons

X, y = make_moons(n_samples=5000, noise=0.2)

# generate test and training sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2)

# visualize data

plt.scatter(X[:,0], X[:,1], s=5, c=y)
plt.title("Moons Dataset")
plt.show()



In [10]:
import numpy as np

# (p. 213) Singular Value Decomposition
X_centered = X - X.mean(axis=0)
U, s, V = np.linalg.svd(X_centered)

# (p. 214) Projecting down to d dimensions
W2 = V.T[:, :2]
X2D = X_centered.dot(W2)

In [6]:
# Same, but use sklearn instead:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X2D = pca.fit_transform(X)

In [ ]: