In [1]:
import numpy as np
import numpy.random as npr
import pylab as pl
%matplotlib


Using matplotlib backend: MacOSX

In [3]:
from sklearn import datasets
data = datasets.make_swiss_roll(1000,0.1)

In [7]:
X = data[0]

In [8]:
Y = data[1]

In [74]:
X,Y = datasets.make_blobs(100,2)

In [75]:
pl.scatter(X[:,0],X[:,1],c=Y)


Out[75]:
<matplotlib.collections.PathCollection at 0x11445ead0>

In [124]:
clusters = [[x] for x in X]

In [125]:
def d(a,b):
    return np.sqrt(sum((a-b)**2))

def single_linkage(A,B):
    i,j = 0,0
    min_dist = d(A[i],B[j])
    for ix in range(len(A)):
        for jx in range(len(B)):
            if d(A[ix],B[jx]) < min_dist:
                i,j = ix,jx
    return d(A[i],B[j]),i,j

In [126]:
single_linkage(clusters[0],clusters[1])


Out[126]:
(2.0343784692274185, 0, 0)

In [127]:
len(clusters)


Out[127]:
100

In [128]:
def find_closest_clusters(clusters):
    i_min,j_min = 0,1
    SL = []
    min_SL = single_linkage(clusters[i_min],clusters[j_min])[0]
    for i in range(len(clusters)):
        for j in range(1,i):
            SL_ij = single_linkage(clusters[i],clusters[j])[0]
            SL.append(SL_ij)
            if SL_ij < min_SL:
                i_min,j_min = i,j
    return i_min,j_min

In [128]:


In [129]:
i,j = find_closest_clusters(clusters)
i,j


Out[129]:
(99, 90)

In [130]:
c_i,c_j = clusters[i],clusters[j]
c_i,c_j


Out[130]:
([array([-3.74014511,  7.48070377])], [array([-3.567253  ,  6.91940706])])

In [131]:
del(clusters[i])

In [132]:
del(clusters[j])

In [133]:
len(clusters)


Out[133]:
98

In [134]:
c_i


Out[134]:
[array([-3.74014511,  7.48070377])]

In [135]:
c_j


Out[135]:
[array([-3.567253  ,  6.91940706])]

In [137]:
c_ = c_i+ c_j
c_


Out[137]:
[array([-3.74014511,  7.48070377]), array([-3.567253  ,  6.91940706])]

In [138]:
clusters.append(c_)

In [148]:
def agglomerative_clustering(data,target_clust=1):
    clusters = [[x] for x in data]
    while len(clusters) > target_clust:
        i,j = find_closest_clusters(clusters)
        c_ = clusters[i]+clusters[j]
        del(clusters[i])
        del(clusters[j])
        clusters.append(c_)
    return clusters

In [172]:
def d_(a,b):
    return np.sqrt(sum((a-b)**2))

def single_linkage_(A,B):
    i,j = 0,0
    min_dist = d_(A[i],B[j])
    for ix in range(len(A)):
        for jx in range(len(B)):
            if d_(A[ix],B[jx]) < min_dist:
                i,j = ix,jx
    return d_(A[i],B[j]),i,j

def find_closest_clusters_(data,clusters):
    i_min,j_min = 0,1
    SL = []
    min_SL = single_linkage_(data[clusters[i_min]],data[clusters[j_min]])[0]
    for i in range(len(clusters)):
        for j in range(1,i):
            SL_ij,SL_i,SL_j = single_linkage_(data[clusters[i]],data[clusters[j]])
            SL.append(SL_ij)
            if SL_ij < min_SL:
                i_min,j_min = i,j
    return i_min,j_min

def agglomerative_clustering_(data,target_clust=1):
    idx = range(len(data))
    clusters = [[i] for i in idx]
    while len(clusters) > target_clust:
        i,j = find_closest_clusters_(data,clusters)
        c_ = clusters[i]+clusters[j]
        del(clusters[i])
        del(clusters[j])
        clusters.append(c_)
    return clusters

In [173]:
clusters = agglomerative_clustering_(X,10)

In [ ]:


In [ ]:


In [149]:
clusters = agglomerative_clustering(X,10)

In [150]:
clusters


Out[150]:
[[array([-7.81667122,  4.64425115])],
 [array([-9.53195833,  5.73809111])],
 [array([-6.8630112 ,  4.35535586])],
 [array([-9.02497314,  4.4362515 ])],
 [array([-7.84904371,  4.12112005])],
 [array([-10.46789657,   5.22821599])],
 [array([-9.78643799,  5.16267652])],
 [array([-3.74014511,  7.48070377]),
  array([-3.567253  ,  6.91940706]),
  array([-2.0473645 ,  7.93147851]),
  array([-1.72203608,  8.65439761]),
  array([-1.40647493,  8.42269607]),
  array([-2.51181048,  6.74938617]),
  array([-1.62254272,  5.09110176]),
  array([-0.53839565,  6.50499882]),
  array([-1.74794027,  5.43818583]),
  array([-2.88253719,  9.10925395]),
  array([-0.74954098,  6.26981377]),
  array([ 0.50581463,  7.4535685 ]),
  array([-3.8118264 ,  7.32607826]),
  array([-2.43665756,  6.99432005]),
  array([-2.50474253,  5.94667189]),
  array([-1.17785978,  4.99782209]),
  array([-2.84488308,  4.00825109]),
  array([-5.31338383,  7.0707774 ]),
  array([-7.11446948,  7.12241725]),
  array([-7.0714006 ,  5.14148934]),
  array([-7.63684144,  3.71370247]),
  array([-8.3492588 ,  3.92972439]),
  array([-7.26559834,  3.48021555]),
  array([-7.33950546,  4.35919962]),
  array([-8.4526524 ,  5.29457505]),
  array([-8.52753773,  5.42311219]),
  array([-7.84285054,  4.05374317]),
  array([-8.03099279,  4.88136692]),
  array([-9.0508696 ,  5.99207694]),
  array([-8.82048242,  4.43845212]),
  array([-7.72522315,  5.48074827]),
  array([-8.68940317,  5.86831811]),
  array([-8.17768576,  5.05817862]),
  array([-6.82411307,  3.59344341]),
  array([-6.99998585,  5.2441237 ]),
  array([-4.38487598,  6.26903222]),
  array([-2.51650154,  8.11684546]),
  array([-3.34031219,  7.70486968]),
  array([-3.17960454,  8.24238934]),
  array([-3.51497733,  8.41483491]),
  array([-3.23088154,  7.57367254]),
  array([-3.69478398,  7.67164451]),
  array([-2.64163656,  7.46616128]),
  array([-1.52324795,  6.36757053]),
  array([-2.51865615,  7.8151723 ]),
  array([-3.40637605,  7.15629526]),
  array([-2.14752263,  5.83614684]),
  array([-1.43551119,  6.2081126 ]),
  array([-2.23801103,  7.46839843]),
  array([-3.90805764,  5.2019522 ])],
 [array([-5.15356045, -7.85696539]),
  array([-5.33492486, -6.41191901]),
  array([-7.15818057, -7.52581599]),
  array([-5.84413817, -7.18474382]),
  array([-5.61731159, -7.53695768]),
  array([-4.98948042, -6.54956041]),
  array([-6.50307701, -6.3614486 ]),
  array([-5.90602245, -8.13347358]),
  array([-6.09795349, -6.05503997]),
  array([-5.65700828, -7.17268488]),
  array([-6.2644111 , -5.98739449]),
  array([-5.54937335, -7.22790985]),
  array([-6.4144993 , -7.36347657]),
  array([-6.43121594, -7.94474451]),
  array([-5.21982193, -7.24062878]),
  array([-6.25677519, -7.6429213 ]),
  array([-6.92580852, -7.48667229]),
  array([-4.31971953, -6.45057788]),
  array([-5.98005893, -7.80199593]),
  array([-4.17603772, -7.90476871]),
  array([-6.10733695, -6.86199781]),
  array([-6.74417994, -6.75963205]),
  array([-4.67138355, -6.38718849]),
  array([-4.45443568, -5.5627554 ]),
  array([-5.44410342, -6.31949992]),
  array([-6.23945828, -7.03899564]),
  array([-5.18725394, -8.21543606]),
  array([-6.29116073, -7.1674039 ]),
  array([-5.29046659, -9.09998018]),
  array([-7.06470083, -9.69648992]),
  array([-6.04764554, -9.07770229]),
  array([-5.7839467 , -7.02433741]),
  array([-4.88668286, -7.2249585 ])],
 [array([-8.1432145 ,  3.29140405]),
  array([-9.3537905 ,  4.66285705]),
  array([-7.58024363,  3.49107728]),
  array([-7.56377374,  3.89835036]),
  array([-6.74249638,  5.54281561]),
  array([-6.36056211,  4.99202763]),
  array([-7.8410894 ,  6.03265188]),
  array([-8.65009112,  5.43058466]),
  array([-7.9128039 ,  6.38949575]),
  array([-8.02884397,  4.71993948])]]

In [ ]: