In [27]:
%matplotlib nbagg
import matplotlib.pyplot as plt
import numpy as np
In [2]:
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
import numpy as np
np.set_printoptions(suppress=True)
digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
In [3]:
from sklearn.preprocessing import StandardScaler
1) Instantiate the model
In [4]:
scaler = StandardScaler()
2) Fit using only the data.
In [5]:
scaler.fit(X_train)
Out[5]:
3) transform the data (not predict).
In [6]:
X_train_scaled = scaler.transform(X_train)
In [7]:
X_train.shape
Out[7]:
In [8]:
X_train_scaled.shape
Out[8]:
The transformed version of the data has the mean removed:
In [9]:
X_train_scaled.mean(axis=0)
Out[9]:
In [10]:
X_train_scaled.std(axis=0)
Out[10]:
In [11]:
X_test_transformed = scaler.transform(X_test)
0) Import the model
In [12]:
from sklearn.decomposition import PCA
1) Instantiate the model
In [13]:
pca = PCA(n_components=2)
2) Fit to training data
In [14]:
pca.fit(X)
Out[14]:
3) Transform to lower-dimensional representation
In [15]:
print(X.shape)
X_pca = pca.transform(X)
X_pca.shape
Out[15]:
In [31]:
plt.figure()
i1 = y == 1
i2 = y == 2
plt.scatter(X_pca[:, 0][i1], X_pca[:, 1][i1], c='b')#, c=y)
plt.scatter(X_pca[:, 0][i2], X_pca[:, 1][i2], c='r')
Out[31]:
In [17]:
from sklearn.manifold import Isomap
isomap = Isomap()
In [18]:
X_isomap = isomap.fit_transform(X)
In [30]:
plt.figure()
plt.scatter(X_isomap[:, 0], X_isomap[:, 1], c=y)
Out[30]:
In [20]:
# %load solutions/digits_tsne.py
In [21]:
from sklearn.manifold import TSNE
In [22]:
tsne = TSNE()
In [23]:
tsne.fit(X_train)
In [24]:
X_tsne = tsne.fit_transform(X)
In [25]:
X_tsne.shape
Out[25]:
In [32]:
plt.figure()
plt.scatter(X_tsne[:,0], X_tsne[:,1], c=y)
Out[32]:
In [ ]: