In [1]:
import numpy as np
import matplotlib.pyplot as plt
X1 = np.random.multivariate_normal([2, 3], [[5, 0], [0,10]], (1000))
X2 = np.random.multivariate_normal([10, 10], [[5, 0], [0,10]], (1000))
plt.plot(X1[:,0], X1[:,1], '.')
plt.plot(X2[:,0], X2[:,1], '.')
plt.show()
In [2]:
X = np.vstack((X1,X2))
plt.plot(X[:,0], X[:,1], '.')
plt.show()
In [3]:
def Kmeans(data, nclusters, iterations):
Clusters = np.random.rand(nclusters, data.shape[1])
assign = np.zeros(shape= (data.shape[0],))
Jprev = 0
for m in range(iterations):
for i in range(data.shape[0]):
distances = np.linalg.norm(Clusters - data[i,:], axis = 1)
assign[i] = np.argmin(distances)
for j in range(nclusters):
ind = assign == j
Clusters[j, :] = np.mean(data[ind, :], axis = 0)
J = 0
for j in range(nclusters):
ind = assign == j
distances = np.linalg.norm(Clusters[j, :] - data[ind,:], axis = 1)
J += np.sum(distances)
if (abs(J - Jprev) < 0.01):
return assign, Clusters
Jprev = J
return assign, Clusters
In [4]:
nclusters= 2
labels, Clusters = Kmeans(X, nclusters, 1000)
for j in range(nclusters):
ind = labels == j
plt.plot(X[ind,0], X[ind,1], '.')
plt.show()
In [5]:
a = np.array([2, 4, 6])
np.where(np.array([True, False]))[0]
Out[5]:
In [37]:
class DBSCAN:
def __init__(self, data, eps, minPts):
self.data = data
self.eps = eps
self.minPts = minPts
self.assign = np.zeros(shape= (data.shape[0],))
self.nclusters = 0
def run(self):
allPoints = np.arange(self.data.shape[0])
clusteredPoints = np.array([])
alreadyChecked = np.array([])
while (len(clusteredPoints) != len(allPoints)):
noClusteredPoints = np.setdiff1d(allPoints, clusteredPoints)
index = np.random.choice(noClusteredPoints)
alreadyChecked = np.append(alreadyChecked, index)
clusterIndices = np.array([index])
clusterIndices, alreadyChecked = self.CheckNeighbors(index, clusterIndices, alreadyChecked)
if (len(clusterIndices) >= self.minPts): #Nuclear Points
self.assign[clusterIndices] = self.nclusters
self.nclusters = self.nclusters + 1
else:
self.assign[index] = -1 #Outlier, Noisy Point
clusteredPoints = np.append(clusteredPoints, clusterIndices)
def CheckNeighbors(self, index, clusterIndices, alreadyChecked):
alreadyChecked = np.append(alreadyChecked, index)
point = self.data[index]
distances = np.linalg.norm(point - self.data, axis = 1)
neighborPoints = distances < self.eps
count = np.sum(neighborPoints) - 1
isNuclear = count >= self.minPts
if isNuclear:
neighborPointsIndices = np.where(neighborPoints)[0]
neighborPointsIndices = np.setdiff1d(neighborPointsIndices, index)
clusterIndices = np.unique(np.append(clusterIndices, neighborPointsIndices))
for j in neighborPointsIndices:
if not (j in alreadyChecked):
clusterIndices, alreadyChecked = self.CheckNeighbors(j, clusterIndices, alreadyChecked)
return clusterIndices, alreadyChecked
In [43]:
dbscan = DBSCAN(X, 0.5, 2)
dbscan.run()
In [44]:
for j in range(dbscan.nclusters):
ind = dbscan.assign == j
plt.plot(X[ind,0], X[ind,1], '.')
plt.show()
In [3]:
# an example of image color segmentation.
from pyclustering.utils import draw_image_mask_segments, read_image;
from pyclustering.samples.definitions import IMAGE_SIMPLE_SAMPLES;
from pyclustering.cluster.kmeans import kmeans
from pyclustering.cluster.agglomerative import agglomerative
from pyclustering.cluster.dbscan import dbscan
import numpy as np
from sklearn.preprocessing import scale
from scipy import misc
filename =IMAGE_SIMPLE_SAMPLES.IMAGE_SIMPLE07;
data = misc.imread(filename)
x = np.arange(0, data.shape[0])
y = np.arange(0, data.shape[1])
print(x,y)
xv, yv = np.meshgrid(x, y)
xv = np.reshape(xv,(data.shape[0]*data.shape[1],1))
yv = np.reshape(yv,(data.shape[0]*data.shape[1],1))
data = np.reshape(data,(data.shape[0]*data.shape[1],data.shape[2]))
data = np.concatenate((xv,yv,data), axis=1)
print(data)
In [4]:
nclusters=4;
dim = np.array(data).shape[1];
indcenters= np.random.randint(np.array(data).shape[0]+1, size = nclusters).tolist()
datanp = np.array(data)
startcenters = datanp[indcenters]
kmeans_instance = kmeans(data, startcenters);
kmeans_instance.process();
In [5]:
#Con K Means
segments = kmeans_instance.get_clusters();
draw_image_mask_segments(filename, segments);
In [7]:
# Con DBSCAN
import warnings
warnings.filterwarnings('ignore')
from pyclustering.cluster.dbscan import dbscan
eps=0.5
neighbors=4
data_un = data.tolist()
datanorm = scale(data).tolist()
dbscan_instance = dbscan(datanorm, eps, neighbors);
%time dbscan_instance.process();
segments = dbscan_instance.get_clusters();
draw_image_mask_segments(filename, segments);
In [ ]:
In [ ]: