In [1]:
import sklearn as sk
import numpy as np
import pylab as pl
In [9]:
def DistSqr(x1,x2):
m = x1.shape[0]
n = x2.shape[0]
d2 = np.zeros((m,n))
for i in np.arange(2):
dx = np.tile(x1[:,i], (n,1))
dc = np.tile(x2[:,i], (m,1))
d = dx.T - dc
d2 = d2 + d*d
return d2
In [205]:
def kmeans(x,k,tol, itermax):
minimum = x.min(axis=0)
maximum = x.max(axis=0)
cx = np.random.uniform(low=minimum[0], high=maximum[0], size=k)
cy = np.random.uniform(low=minimum[1], high=maximum[1], size=k)
c = np.array([cx,cy]).T
it =0
err = tol
clusters = []
c_copy = c.copy()
while (it <= itermax) and (err >= tol):
clusters = DistSqr(x, c_copy).argmin(axis=1)
for i in range(0, k):
c_copy[i, :] = x[clusters == i, : ].mean(axis=0)
err = np.absolute(np.subtract(c_copy, c)).max()
c = c_copy
c_copy = c.copy()
it = it+1
return clusters, it
In [206]:
X = np.random.rand(1000,2)
In [207]:
k = 4
In [208]:
tol = 0.0001
In [209]:
itermax = 1000
In [210]:
clusters,it = kmeans(X, k, tol, itermax)
In [211]:
print it
In [212]:
pl.scatter(X[:,0], X[:,1], c=clusters, s=100)
pl.show()
In [180]:
cx = np.random.uniform(low=X.min(axis=0)[0], high=X.max(axis=0)[0], size=k)
In [92]:
cx
Out[92]:
In [94]:
cy = np.random.uniform(low=X.min(axis=0)[0], high=X.max(axis=0)[0], size=k)
In [95]:
cy
Out[95]:
In [ ]: