In [ ]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

Clustering


In [ ]:
from sklearn.datasets import make_blobs
X, y = make_blobs(random_state=42)
X.shape

In [ ]:
plt.scatter(X[:, 0], X[:, 1])

In [ ]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=3)

In [ ]:
kmeans.fit(X)

In [ ]:
cluster_labels = kmeans.predict(X)

In [ ]:
cluster_labels

In [ ]:
plt.scatter(X[:, 0], X[:, 1], c=cluster_labels)

In [ ]:
y

In [ ]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [ ]:
accuracy_score(y, cluster_labels)

In [ ]:
confusion_matrix(y, cluster_labels)

In [ ]:
from sklearn.metrics import adjusted_rand_score

In [ ]:
adjusted_rand_score(y, cluster_labels)

KMeans can generalize, SpectralClustering can not

KMeans has a predict function.


In [ ]:
X_more = np.random.uniform(X.min(), X.max(), size=(10, 2))
X_more
more_cluster_labels = kmeans.predict(X_more)

In [ ]:
plt.scatter(X[:, 0], X[:, 1], c=cluster_labels)
plt.scatter(X_more[:, 0], X_more[:, 1], marker="x", c=more_cluster_labels)

In [ ]:
from sklearn.cluster import SpectralClustering
spectral_clustering = SpectralClustering(n_clusters=3, gamma=.1)

In [ ]:
spectral_clustering.fit(X)

In [ ]:
cluster_labels = spectral_clustering.predict(X)

In [ ]:
spectral_clustering.labels_

In [ ]:
cluster_labels = spectral_clustering.fit_predict(X)

In [ ]:
plt.scatter(X[:, 0], X[:, 1], c=cluster_labels)

There is no way to apply spectral_clustering not to X_more.

An overview of clustering algorithms

A less trivial example


In [ ]:
from sklearn.datasets import load_digits
digits = load_digits()

X, y = digits.data, digits.target

In [ ]:
kmeans = KMeans(n_clusters=10)
kmeans.fit(X)

In [ ]:
adjusted_rand_score(y, kmeans.predict(X))

In [ ]:
_, axes = plt.subplots(2, 5)
for ax, center in zip(axes.ravel(), kmeans.cluster_centers_):
    ax.matshow(center.reshape(8, 8), cmap=plt.cm.gray)
    ax.set_xticks(())
    ax.set_yticks(())

In [ ]: