# k-means algorithm

### How to use Kmeans?

```Kmeans(number_of_points, dimentions)
```

Parameters:

``````- number_of_points - the number of points to be generated randomally.

- dimentions [optional] - the number of dimentions for each point (2=2d, 3=3d)

``````

Attributes:

``````- Kmeans.cluster(number_of_clusters)

number_of_clusters - the number of clusters to look for

``````

### Examples

```# instantiate the class with 200 random points
clusters = Kmeans(200)

# find 5 clusters
clusters.cluster(5)
```
``````

In [29]:

import numpy as np
import matplotlib.pyplot as plt

class Kmeans:

def __init__(self, number_of_points, dimentions=2):
self.number_of_points = number_of_points
self.dimentions = dimentions

self.points = np.zeros((number_of_points, dimentions))
for i in range(number_of_points):
self.points[i] = (np.random.randint(100, size=1)[0],np.random.randint(100, size=1)[0])

def __init_centers(self):
self.centers = np.zeros((self.number_of_clusters, self.dimentions))

for i in range(self.number_of_clusters):
self.centers[i] = (np.random.randint(100, size=1)[0],np.random.randint(100, size=1)[0])

def __init_assign_labels(self):
for p in range(self.number_of_points):
closest_distance = np.sqrt(np.sum((self.points[p]-self.centers[0])**2))
for c in range(self.number_of_clusters):
if closest_distance > np.sqrt(np.sum((self.points[p]-self.centers[c])**2)): # could also be done using np.linalg.norm
self.labels[p] = c
closest_distance = np.sqrt(np.sum((self.points[p]-self.centers[c])**2))

return

def __assign_labels(self, old_labels):

self.labels_prev = np.copy(self.labels)

for p in range(self.number_of_points):
closest_distance = np.sqrt(np.sum((self.points[p]-self.labels[p])**2))
for c in range(self.number_of_clusters):
if closest_distance > np.sqrt(np.sum((self.points[p]-self.centers[c])**2)):
self.labels[p] = c
closest_distance = np.sqrt(np.sum((self.points[p]-self.centers[c])**2))

compare = self.labels_prev == self.labels
is_no_change = True

for c in compare:
if c == False:
is_no_change = False
break

return is_no_change

def __recenter(self):

new_centers = np.zeros(self.centers.shape)
cluster_points_count = np.zeros(self.number_of_clusters)

for c in range(self.number_of_points):
new_centers[int(self.labels[c])] = new_centers[int(self.labels[c])] + self.points[c]
cluster_points_count[int(self.labels[c])]+=1

for c in range (self.number_of_clusters):
self.centers[c] = new_centers[c] / cluster_points_count[c]

return

def cluster(self, number_of_clusters=2):
self.number_of_clusters = number_of_clusters
self.__init_centers()
self.labels = np.zeros(self.number_of_points)
self.labels_prev = np.zeros(self.number_of_points)
self.__init_assign_labels()

end = False

while end == False:
self.__recenter()
end = self.__assign_labels(self.labels)

return self.labels, self.centers

``````
``````

In [74]:

clusters = Kmeans(800)
labels, centers = clusters.cluster(7)

plt.figure(0).set_size_inches(8,8)
plt.scatter(clusters.points[:,0], clusters.points[:,1], c=labels, alpha=0.6, s=40, cmap='Set2')
plt.scatter(centers[:,0], centers[:,1], c='red', s=120, marker='+')
plt.show()

``````
``````

``````
``````

In [ ]:

``````