In [39]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
def plotClusters(sampleValues, centroidValues, nSamplesPerCluster):
colors = plt.cm.rainbow(np.linspace(0, 1, len(centroidValues)))
for i, centroid in enumerate(centroidValues):
samples = sampleValues[i * nSamplesPerCluster:(i + 1) * nSamplesPerCluster]
plt.scatter(samples[:, 0], samples[:, 1], c=colors[i])
plt.plot(centroid[0], centroid[1], markersize=15, marker='.', color='k', mew=4)
# plt.plot(centroid[0], centroid[1], markersize=10, marker='x', color='m', mew=5)
plt.show()
def createSamples(numClusters, numSamplesPerCluster, numFeatures, embiggenFactor, seed):
np.random.seed(seed)
slices = []
centroids = []
for i in range(numClusters):
samples = tf.random_normal(
(numSamplesPerCluster, numFeatures),
mean=0.0, stddev=5.0, dtype=tf.float32, seed=seed, name="cluster_{}".format(i)
)
currentCentroid = (np.random.random((1, numFeatures)) * embiggenFactor) - (embiggenFactor / 2)
centroids.append(currentCentroid)
samples += currentCentroid
slices.append(samples)
samples = tf.concat(slices, 0, name="samples")
centroids = tf.concat(centroids, 0, name="centroids")
return centroids, samples
nFeatures = 2
nClusters = 3
nSamplesPerCluster = 500
seed = 100
embiggenFactor = 70
np.random.seed(seed)
centroids, samples = createSamples(nClusters, nSamplesPerCluster, nFeatures, embiggenFactor, seed)
model = tf.global_variables_initializer()
with tf.Session() as session:
samplesValues = session.run(samples)
centroidValues = session.run(centroids)
centroidValues
plotClusters(samplesValues, centroidValues, nSamplesPerCluster)
In [40]:
# Initialisation
def chooseRandomCentroid(samples, nClusters):
nSamples = tf.shape(samples)[0]
randomSlices = tf.random_shuffle(tf.range(0, nSamples))
begin = [0, ]
size = [nClusters, ]
centroidIndices = tf.slice(randomSlices, begin, size)
initialCentroids = tf.gather(samples, centroidIndices)
return initialCentroids
def assignNearest(samples, centroids):
expandedVectors = tf.expand_dims(samples, 0)
expandedCentroids = tf.expand_dims(centroids, 1)
distances = tf.reduce_sum(tf.square(tf.subtract(expandedVectors, expandedCentroids)), 2)
return tf.argmin(distances, 0)
def updateCentroids(samples, nearestIndices, nClusters):
nearestIndices = tf.to_int32(nearestIndices)
partitions = tf.dynamic_partition(samples, nearestIndices, nClusters)
return tf.concat([tf.expand_dims(tf.reduce_mean(partition,0), 0) for partition in partitions], 0)
initialCentroids = chooseRandomCentroid(samples, nClusters)
nearestIndices = assignNearest(samples, initialCentroids)
latestCentroid = updateCentroids(samples, nearestIndices, nClusters)
model = tf.global_variables_initializer()
with tf.Session() as session:
sampleValues = session.run(samples)
updatedCentroidValues = session.run(latestCentroid)
# updatedCentroidValues
plotClusters(sampleValues, updatedCentroidValues, nSamplesPerCluster)
In [ ]: