In [1]:
%matplotlib inline
In [2]:
POINTS = 50 # number of data points
import numpy as np
import matplotlib.pyplot as plt
seed = np.random.seed(42)
x, y = np.random.randint(100, size=(2, POINTS))
def plot_data_points():
plt.scatter(x,y)
# Plotting
plot_data_points()
In [3]:
CLUSTERS = 8 # number of desired clusters
from scipy.cluster.vq import kmeans, vq
data = np.vstack(zip(x, y))
## K-means clustering
centers = kmeans(data, CLUSTERS)[0]
cluster = vq(data, centers)[0]
def plot_kmeans_clustering():
plt.plot(centers[:, 0], centers[:, 1], '+', markersize=15)
# Plotting
plot_data_points()
plot_kmeans_clustering()
In [4]:
from scipy.spatial import Voronoi
vor = Voronoi(centers)
for simplex in vor.ridge_vertices:
simplex = np.asarray(simplex)
if np.all(simplex >= 0):
plt.plot(vor.vertices[simplex, 0], vor.vertices[simplex, 1], 'k-')
center = centers.mean(axis=0)
for pointidx, simplex in zip(vor.ridge_points, vor.ridge_vertices):
simplex = np.asarray(simplex)
if np.any(simplex < 0):
i = simplex[simplex >= 0][0] # finite end Voronoi vertex
# Getting distance between points
t = centers[pointidx[1]] - centers[pointidx[0]]
# Normalizing to a unit vector. To do that, just get the norm
# (by the Pythagorean theorem), and divide the vector by the norm
t = t/np.linalg.norm(t)
n = np.array([-t[1], t[0]]) # normal
midpoint = centers[pointidx].mean(axis=0)
far_point = (vor.vertices[i] +
np.sign(np.dot(midpoint - center, n)) * n * 50)
plt.plot([vor.vertices[i,0], far_point[0]],
[vor.vertices[i,1], far_point[1]], 'k--')
# Plotting
plot_data_points()
plot_kmeans_clustering()
plt.show()
# Voronoi regions could also be obtained through Delaunay triangulation
# http://stackoverflow.com/questions/10650645/python-calculate-voronoi-tesselation-from-scipys-delaunay-triangulation-in-3d