In [ ]:

    
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

Clustering



In [ ]:

    
from sklearn.datasets import make_blobs
X, y = make_blobs(random_state=42)
X.shape



In [ ]:

    
plt.scatter(X[:, 0], X[:, 1])



In [ ]:

    
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=3)



In [ ]:

    
kmeans.fit(X)



In [ ]:

    
cluster_labels = kmeans.predict(X)



In [ ]:

    
cluster_labels



In [ ]:

    
plt.scatter(X[:, 0], X[:, 1], c=cluster_labels)



In [ ]:

    
y



In [ ]:

    
from sklearn.metrics import accuracy_score, confusion_matrix



In [ ]:

    
accuracy_score(y, cluster_labels)



In [ ]:

    
confusion_matrix(y, cluster_labels)



In [ ]:

    
from sklearn.metrics import adjusted_rand_score



In [ ]:

    
adjusted_rand_score(y, cluster_labels)

KMeans can generalize, SpectralClustering can not

KMeans has a predict function.



In [ ]:

    
X_more = np.random.uniform(X.min(), X.max(), size=(10, 2))
X_more
more_cluster_labels = kmeans.predict(X_more)



In [ ]:

    
plt.scatter(X[:, 0], X[:, 1], c=cluster_labels)
plt.scatter(X_more[:, 0], X_more[:, 1], marker="x", c=more_cluster_labels)



In [ ]:

    
from sklearn.cluster import SpectralClustering
spectral_clustering = SpectralClustering(n_clusters=3, gamma=.1)



In [ ]:

    
spectral_clustering.fit(X)



In [ ]:

    
cluster_labels = spectral_clustering.predict(X)



In [ ]:

    
spectral_clustering.labels_



In [ ]:

    
cluster_labels = spectral_clustering.fit_predict(X)



In [ ]:

    
plt.scatter(X[:, 0], X[:, 1], c=cluster_labels)

There is no way to apply spectral_clustering not to X_more.

An overview of clustering algorithms

More on the website: http://scikit-learn.org/stable/modules/clustering.html

A less trivial example



In [ ]:

    
from sklearn.datasets import load_digits
digits = load_digits()

X, y = digits.data, digits.target



In [ ]:

    
kmeans = KMeans(n_clusters=10)
kmeans.fit(X)



In [ ]:

    
adjusted_rand_score(y, kmeans.predict(X))



In [ ]:

    
_, axes = plt.subplots(2, 5)
for ax, center in zip(axes.ravel(), kmeans.cluster_centers_):
    ax.matshow(center.reshape(8, 8), cmap=plt.cm.gray)
    ax.set_xticks(())
    ax.set_yticks(())



In [ ]: