notebook.community

Edit and run



In [1]:

    
%matplotlib inline



In [4]:

    
from sklearn import datasets
from sklearn import cluster
import matplotlib.pyplot as plt



In [5]:

    
blobs, ground_truth = datasets.make_blobs(1000, centers=3,
                                         cluster_std=1.75)



In [8]:

    
f, ax = plt.subplots(figsize=(10,7.5))
colors = ['r', 'g', 'b']
for i in range(3):
    p = blobs[ground_truth == i]
    ax.scatter(p[:,0], p[:,1], c=colors[i],
               label = 'Cluster {}'.format(i))
ax.set_title('Cluster with Ground Truth')
ax.legend(loc='best')









    Out[8]:





<matplotlib.legend.Legend at 0x10ec820d0>



In [9]:

    
kmeans = cluster.KMeans(n_clusters=3)
kmeans.fit(blobs)









    Out[9]:





KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10,
    n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,
    verbose=0)



In [10]:

    
kmeans.cluster_centers_









    Out[10]:





array([[-0.59589408, -7.76309105],
       [ 4.89658628,  7.42787561],
       [-7.6649924 , -4.18354287]])



In [11]:

    
f, ax = plt.subplots(figsize=(10, 7))
for i in range(3):
    p = blobs[ground_truth==i]
    ax.scatter(p[:,0], p[:,1], c=colors[i],
              label='Cluster {}'.format(i))
ax.scatter(kmeans.cluster_centers_[:, 0],
          kmeans.cluster_centers_[:, 1], s=100, color='black',
          label='Centers', marker='*')
ax.set_title('Clusters with Ground Truth Centers')
ax.legend(loc='best')









    Out[11]:





<matplotlib.legend.Legend at 0x10eed81d0>



In [12]:

    
for i in range(3):
    print(kmeans.labels_ == ground_truth)[ground_truth == i].astype(int).mean()









    



0.988023952096
1.0
0.990990990991



In [13]:

    
new_ground_truth = ground_truth.copy()
new_ground_truth[ground_truth == 0] = 2
new_ground_truth[ground_truth == 2] = 0



In [15]:

    
for i in range(3):
    print(kmeans.labels_ == new_ground_truth)[ground_truth == i].astype(int).mean()









    



0.0119760479042
1.0
0.00900900900901



In [ ]: