In [1]:
# (p. 213) Principal Components Analysis
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=5000, noise=0.2)
# generate test and training sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2)
# visualize data
plt.scatter(X[:,0], X[:,1], s=5, c=y)
plt.title("Moons Dataset")
plt.show()
In [10]:
import numpy as np
# (p. 213) Singular Value Decomposition
X_centered = X - X.mean(axis=0)
U, s, V = np.linalg.svd(X_centered)
# (p. 214) Projecting down to d dimensions
W2 = V.T[:, :2]
X2D = X_centered.dot(W2)
In [6]:
# Same, but use sklearn instead:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X2D = pca.fit_transform(X)
In [ ]: