In [ ]:
%matplotlib nbagg
import matplotlib.pyplot as plt
import numpy as np
In [ ]:
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
import numpy as np
np.set_printoptions(suppress=True)
digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
In [ ]:
from sklearn.preprocessing import StandardScaler
1) Instantiate the model
In [ ]:
scaler = StandardScaler()
2) Fit using only the data.
In [ ]:
scaler.fit(X_train)
3) transform
the data (not predict
).
In [ ]:
X_train_scaled = scaler.transform(X_train)
In [ ]:
X_train.shape
In [ ]:
X_train_scaled.shape
The transformed version of the data has the mean removed:
In [ ]:
X_train_scaled.mean(axis=0)
In [ ]:
X_train_scaled.std(axis=0)
In [ ]:
X_test_transformed = scaler.transform(X_test)
0) Import the model
In [ ]:
from sklearn.decomposition import PCA
1) Instantiate the model
In [ ]:
pca = PCA(n_components=2)
2) Fit to training data
In [ ]:
pca.fit(X)
3) Transform to lower-dimensional representation
In [ ]:
print(X.shape)
X_pca = pca.transform(X)
X_pca.shape
In [ ]:
plt.figure()
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)
In [ ]:
pca.components_.shape
In [ ]:
plt.matshow(pca.components_[0].reshape(8, 8), cmap="gray")
plt.colorbar()
plt.matshow(pca.components_[1].reshape(8, 8), cmap="gray")
plt.colorbar()
In [ ]:
from sklearn.manifold import Isomap
isomap = Isomap()
In [ ]:
X_isomap = isomap.fit_transform(X)
In [ ]:
plt.scatter(X_isomap[:, 0], X_isomap[:, 1], c=y)
In [ ]:
# %load solutions/digits_unsupervised.py
from sklearn.manifold import TSNE
from sklearn.decomposition import NMF
# Compute TSNE embedding
tsne = TSNE()
X_tsne = tsne.fit_transform(X)
# Visualize TSNE results
plt.title("All classes")
plt.figure()
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y)
# build an NMF factorization of the digits dataset
nmf = NMF(n_components=16).fit(X)
# visualize the components
fig, axes = plt.subplots(4, 4)
for ax, component in zip(axes.ravel(), nmf.components_):
ax.imshow(component.reshape(8, 8), cmap="gray", interpolation="nearest")
In [ ]: