In [ ]:
import random
import collections
class KMeans():
    def __init__(self):
        self.k = 3
        
    def set(self, k, max_iter):
        self.k = k
        self.max_iter = max_iter

    def euc_dist(self, a, b):
        return sum([(ai - bi) ** 2 for ai, bi in zip(a, b)])

    def fit(self, points):
        n = len(points)
        dim = len(points[0])
        centers = [points[i] for i in random.sample(range(0, n), self.k)]
        custer = collections.defaultdict(list)


        for it in range(self.max_iter):
            # assign point to center
            for i in range(n):
                min_idx = 0
                min_dist = self.euc_dist(points[i], centers[0])
                for j in range(1, self.k):
                    cur_dist = self.euc_dist(points[i], centers[j])
                    if cur_dist < min_dist:
                        min_idx = j
                        min_dist = cur_dist
                custer[min_idx].append(points[i])

            # update center
            for i in range(self.k):
                sumv = [0] * dim
                cn = len(custer[i])
                for p in custer[i]:
                    for j in range(dim):
                        sumv[j] += p[j]
                centers[i] = list(map(lambda x: x / cn, sumv))
        return centers

In [54]:
points = [(1, 1), (2, 2), (1, 2), (2, 1),
          (-3, -3), (-4, -4), (-3, -4), (-4, -3)
          ]
km = KMeans()
km.set(2, 10)
centers = km.fit(points)
print centers


[[1, 1], [-3, -4], [-4, -4]]

In [ ]: