In [1]:
import sklearn as sk
import numpy as np
import pylab as pl

In [9]:
def DistSqr(x1,x2):
    m = x1.shape[0]
    n = x2.shape[0]
    d2 = np.zeros((m,n))
    for i in np.arange(2):
        dx = np.tile(x1[:,i], (n,1))
        dc = np.tile(x2[:,i], (m,1))
        d = dx.T - dc
        d2 = d2 + d*d
    return d2

In [205]:
def kmeans(x,k,tol, itermax):
    minimum = x.min(axis=0)
    maximum = x.max(axis=0)
    cx = np.random.uniform(low=minimum[0], high=maximum[0], size=k)
    cy = np.random.uniform(low=minimum[1], high=maximum[1], size=k)
    c = np.array([cx,cy]).T
    it =0
    err = tol
    clusters = []
    c_copy = c.copy()
    while (it <= itermax) and (err >= tol):
        clusters = DistSqr(x, c_copy).argmin(axis=1)
        for i in range(0, k):
            c_copy[i, :] = x[clusters == i, : ].mean(axis=0)
        err = np.absolute(np.subtract(c_copy, c)).max()
        c = c_copy
        c_copy = c.copy()
        it = it+1
 
    return clusters, it

In [206]:
X = np.random.rand(1000,2)

In [207]:
k = 4

In [208]:
tol = 0.0001

In [209]:
itermax = 1000

In [210]:
clusters,it = kmeans(X, k, tol, itermax)

In [211]:
print it


12

In [212]:
pl.scatter(X[:,0], X[:,1], c=clusters, s=100)
pl.show()



In [180]:
cx = np.random.uniform(low=X.min(axis=0)[0], high=X.max(axis=0)[0], size=k)

In [92]:
cx


Out[92]:
array([ 0.10631408,  0.44111524,  0.31212831,  0.57036577])

In [94]:
cy = np.random.uniform(low=X.min(axis=0)[0], high=X.max(axis=0)[0], size=k)

In [95]:
cy


Out[95]:
array([ 0.61168628,  0.15250901,  0.91208587,  0.38593201])

In [ ]: