# K-means clustering and Voronoi tesselation

## Setting up initial data

``````

In [1]:

%matplotlib inline

``````
``````

In [2]:

POINTS = 50 # number of data points

import numpy as np
import matplotlib.pyplot as plt

seed = np.random.seed(42)
x, y = np.random.randint(100, size=(2, POINTS))

def plot_data_points():
plt.scatter(x,y)

# Plotting
plot_data_points()

``````
``````

``````

## K-means clustering

``````

In [3]:

CLUSTERS = 8 # number of desired clusters

from scipy.cluster.vq import kmeans, vq

data = np.vstack(zip(x, y))

## K-means clustering
centers = kmeans(data, CLUSTERS)[0]
cluster = vq(data, centers)[0]

def plot_kmeans_clustering():
plt.plot(centers[:, 0], centers[:, 1], '+', markersize=15)

# Plotting
plot_data_points()
plot_kmeans_clustering()

``````
``````

``````

## Voronoi tesselation

``````

In [4]:

from scipy.spatial import Voronoi

vor = Voronoi(centers)

for simplex in vor.ridge_vertices:
simplex = np.asarray(simplex)
if np.all(simplex >= 0):
plt.plot(vor.vertices[simplex, 0], vor.vertices[simplex, 1], 'k-')

center = centers.mean(axis=0)
for pointidx, simplex in zip(vor.ridge_points, vor.ridge_vertices):
simplex = np.asarray(simplex)
if np.any(simplex < 0):
i = simplex[simplex >= 0][0] # finite end Voronoi vertex

# Getting distance between points
t = centers[pointidx[1]] - centers[pointidx[0]]

# Normalizing to a unit vector. To do that, just get the norm
# (by the Pythagorean theorem), and divide the vector by the norm
t = t/np.linalg.norm(t)

n = np.array([-t[1], t[0]]) # normal
midpoint = centers[pointidx].mean(axis=0)
far_point = (vor.vertices[i] +
np.sign(np.dot(midpoint - center, n)) * n * 50)
plt.plot([vor.vertices[i,0], far_point[0]],
[vor.vertices[i,1], far_point[1]], 'k--')

# Plotting
plot_data_points()
plot_kmeans_clustering()
plt.show()

# Voronoi regions could also be obtained through Delaunay triangulation
# http://stackoverflow.com/questions/10650645/python-calculate-voronoi-tesselation-from-scipys-delaunay-triangulation-in-3d

``````
``````

``````