K-means clustering and Voronoi tesselation

Setting up initial data


In [1]:
%matplotlib inline

In [2]:
POINTS = 50 # number of data points

import numpy as np
import matplotlib.pyplot as plt

seed = np.random.seed(42)
x, y = np.random.randint(100, size=(2, POINTS))

def plot_data_points():
    plt.scatter(x,y)

# Plotting
plot_data_points()


K-means clustering


In [3]:
CLUSTERS = 8 # number of desired clusters

from scipy.cluster.vq import kmeans, vq

data = np.vstack(zip(x, y))

## K-means clustering
centers = kmeans(data, CLUSTERS)[0]
cluster = vq(data, centers)[0]

def plot_kmeans_clustering():
    plt.plot(centers[:, 0], centers[:, 1], '+', markersize=15)

# Plotting
plot_data_points()
plot_kmeans_clustering()


Voronoi tesselation


In [4]:
from scipy.spatial import Voronoi

vor = Voronoi(centers)

for simplex in vor.ridge_vertices:
    simplex = np.asarray(simplex)
    if np.all(simplex >= 0):
        plt.plot(vor.vertices[simplex, 0], vor.vertices[simplex, 1], 'k-')

center = centers.mean(axis=0)
for pointidx, simplex in zip(vor.ridge_points, vor.ridge_vertices):
    simplex = np.asarray(simplex)
    if np.any(simplex < 0):
        i = simplex[simplex >= 0][0] # finite end Voronoi vertex
        
        # Getting distance between points
        t = centers[pointidx[1]] - centers[pointidx[0]] 
        
        # Normalizing to a unit vector. To do that, just get the norm
        # (by the Pythagorean theorem), and divide the vector by the norm
        t = t/np.linalg.norm(t)
        
        n = np.array([-t[1], t[0]]) # normal
        midpoint = centers[pointidx].mean(axis=0)
        far_point = (vor.vertices[i] +
                     np.sign(np.dot(midpoint - center, n)) * n * 50)
        plt.plot([vor.vertices[i,0], far_point[0]],
             [vor.vertices[i,1], far_point[1]], 'k--')
        
# Plotting
plot_data_points()
plot_kmeans_clustering()
plt.show()

# Voronoi regions could also be obtained through Delaunay triangulation
# http://stackoverflow.com/questions/10650645/python-calculate-voronoi-tesselation-from-scipys-delaunay-triangulation-in-3d