In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
In [3]:
from sklearn.datasets import make_blobs
X, y = make_blobs(random_state=42, centers=3)
X[:,1] += 0.25*X[:,0]**2
# print(X.shape)
# print(y)
# plt.scatter(X[:, 0], X[:, 1], 20, y, edgecolor='none')
plt.plot(X[:, 0], X[:, 1], 'ok')
Out[3]:
In [7]:
from sklearn.cluster import KMeans, AffinityPropagation, SpectralClustering
# cluster = AffinityPropagation()
# cluster = KMeans(n_clusters=3)
cluster = SpectralClustering(n_clusters=3)
# kmeans.fit(X)
# kmeans.labels_
# labels = cluster.predict(X)
labels = cluster.fit_predict(X)
print('Labels: \n', labels)
print('Data: \n', y)
# print(cluster.cluster_centers_)
plt.scatter(X[:, 0], X[:, 1], 20, labels, edgecolor='none')
# for n in range(3):
# plt.plot(cluster.cluster_centers_[n, 0], cluster.cluster_centers_[n, 1], 'ok', markersize=20)
Out[7]:
In [45]:
from sklearn.datasets import load_digits
digits = load_digits()
print(len(digits.images))
fig = plt.figure(figsize=(6, 6))
for i in range(64):
ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
ax.matshow(digits.images[i], cmap=plt.cm.binary)
ax.text(0, 7, str(digits.target[i]))
In [48]:
digits.data.shape
Out[48]:
In [51]:
from sklearn.decomposition import RandomizedPCA, PCA
pca = PCA(n_components=2)
proj = pca.fit_transform(digits.data)
plt.scatter(proj[:, 0], proj[:, 1], 30, digits.target, edgecolor='none')
plt.colorbar()
Out[51]:
In [52]:
from sklearn.manifold import Isomap
iso = Isomap(n_neighbors=5, n_components=2)
proj = iso.fit_transform(digits.data)
plt.scatter(proj[:, 1], proj[:, 0], 30, digits.target, edgecolor='none')
Out[52]:
In [66]:
kmeans = KMeans(n_clusters=10, random_state=42)
labels = kmeans.fit(digits.data)
# kmeans.cluster_centers_.shape
fig, axs = plt.subplots(2, 5, figsize=(8, 3))
axs = axs.flatten()
for n in range(10):
axs[n].imshow(kmeans.cluster_centers_[n].reshape(8, 8), cmap=plt.cm.gray_r)
In [22]:
from sklearn.naive_bayes import GaussianNB
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target)
clf = GaussianNB()
clf.fit(X_train, y_train)
predicted = clf.predict(X_test)
expected = y_test
In [23]:
from sklearn import metrics
print(metrics.classification_report(expected, predicted))
In [24]:
print(metrics.confusion_matrix(expected, predicted))
In [ ]: