In [1]:
import numpy as np
import matplotlib.pyplot as plt

X1 = np.random.multivariate_normal([2, 3], [[5, 0], [0,10]], (1000))
X2 = np.random.multivariate_normal([10, 10], [[5, 0], [0,10]], (1000))

plt.plot(X1[:,0], X1[:,1], '.')
plt.plot(X2[:,0], X2[:,1], '.')
plt.show()


<Figure size 640x480 with 1 Axes>

In [2]:
X = np.vstack((X1,X2))
plt.plot(X[:,0], X[:,1], '.')
plt.show()



In [3]:
def Kmeans(data, nclusters, iterations):
    
    Clusters = np.random.rand(nclusters, data.shape[1])
    assign = np.zeros(shape= (data.shape[0],))
    Jprev = 0
    
    for m in range(iterations):
        for i in range(data.shape[0]):
            distances = np.linalg.norm(Clusters - data[i,:], axis = 1)
            assign[i] = np.argmin(distances)

        for j in range(nclusters):
            ind = assign == j
            Clusters[j, :] = np.mean(data[ind, :], axis = 0)
        
        J = 0
        for j in range(nclusters):
            ind = assign == j
            distances = np.linalg.norm(Clusters[j, :] - data[ind,:], axis = 1)
            J += np.sum(distances)
        
        if (abs(J - Jprev) < 0.01):
            return assign, Clusters
        
        Jprev = J
            
    return assign, Clusters

In [4]:
nclusters= 2
labels, Clusters = Kmeans(X, nclusters, 1000)

for j in range(nclusters):
    ind = labels == j
    plt.plot(X[ind,0], X[ind,1], '.')
    
plt.show()



In [5]:
a = np.array([2, 4, 6])
np.where(np.array([True, False]))[0]


Out[5]:
array([0], dtype=int64)

In [37]:
class DBSCAN:
    def __init__(self, data, eps, minPts):
        self.data = data
        self.eps = eps
        self.minPts = minPts
        self.assign = np.zeros(shape= (data.shape[0],))
        self.nclusters = 0
        
    def run(self):
        allPoints = np.arange(self.data.shape[0])
        clusteredPoints = np.array([])
        alreadyChecked = np.array([])
        
        while (len(clusteredPoints) != len(allPoints)):
            noClusteredPoints = np.setdiff1d(allPoints, clusteredPoints)
            
            index = np.random.choice(noClusteredPoints)
            
            alreadyChecked = np.append(alreadyChecked, index)

            clusterIndices = np.array([index])
            clusterIndices, alreadyChecked = self.CheckNeighbors(index, clusterIndices, alreadyChecked)
            
            if (len(clusterIndices) >= self.minPts): #Nuclear Points
                self.assign[clusterIndices] = self.nclusters
                self.nclusters = self.nclusters + 1
            else:
                self.assign[index] = -1 #Outlier, Noisy Point
                
            
            clusteredPoints = np.append(clusteredPoints, clusterIndices)
        
        
    def CheckNeighbors(self, index, clusterIndices, alreadyChecked):
        alreadyChecked = np.append(alreadyChecked, index)
        
        point = self.data[index]
        distances = np.linalg.norm(point - self.data, axis = 1)
        neighborPoints = distances < self.eps
        count = np.sum(neighborPoints) - 1

        isNuclear = count >= self.minPts
        
        if isNuclear:
            neighborPointsIndices = np.where(neighborPoints)[0]
            neighborPointsIndices = np.setdiff1d(neighborPointsIndices, index)
            clusterIndices = np.unique(np.append(clusterIndices, neighborPointsIndices))
            for j in neighborPointsIndices:
                if not (j in alreadyChecked):
                    clusterIndices, alreadyChecked = self.CheckNeighbors(j, clusterIndices, alreadyChecked)

        return clusterIndices, alreadyChecked

In [43]:
dbscan = DBSCAN(X, 0.5, 2)
dbscan.run()

In [44]:
for j in range(dbscan.nclusters):
    ind = dbscan.assign == j
    plt.plot(X[ind,0], X[ind,1], '.')
    
plt.show()



In [3]:
# an example of image color segmentation.
from pyclustering.utils import draw_image_mask_segments, read_image;
from pyclustering.samples.definitions import IMAGE_SIMPLE_SAMPLES;
from pyclustering.cluster.kmeans import kmeans
from pyclustering.cluster.agglomerative import agglomerative
from pyclustering.cluster.dbscan import dbscan
import numpy as np
from sklearn.preprocessing import scale

from scipy import misc
filename =IMAGE_SIMPLE_SAMPLES.IMAGE_SIMPLE07;
data = misc.imread(filename)

x = np.arange(0, data.shape[0])
y = np.arange(0, data.shape[1])
print(x,y)

xv, yv = np.meshgrid(x, y)
xv = np.reshape(xv,(data.shape[0]*data.shape[1],1))
yv = np.reshape(yv,(data.shape[0]*data.shape[1],1))
data = np.reshape(data,(data.shape[0]*data.shape[1],data.shape[2]))
data = np.concatenate((xv,yv,data), axis=1)
print(data)


[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31] [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31]
[[  0   0 255 255 255 255]
 [  1   0 255 255 255 255]
 [  2   0 255 255 255 255]
 ...
 [ 29  31 255 255 255 255]
 [ 30  31 255 255 255 255]
 [ 31  31 255 255 255 255]]
C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:12: DeprecationWarning: `imread` is deprecated!
`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  if sys.path[0] == '':

In [4]:
nclusters=4;
dim = np.array(data).shape[1];

indcenters= np.random.randint(np.array(data).shape[0]+1, size = nclusters).tolist()
datanp = np.array(data)

startcenters = datanp[indcenters]
kmeans_instance = kmeans(data, startcenters);

kmeans_instance.process();

In [5]:
#Con K Means
segments = kmeans_instance.get_clusters();

draw_image_mask_segments(filename, segments);



In [7]:
# Con DBSCAN

import warnings 
warnings.filterwarnings('ignore') 

from pyclustering.cluster.dbscan import dbscan

eps=0.5
neighbors=4
data_un = data.tolist()
datanorm = scale(data).tolist()
dbscan_instance = dbscan(datanorm, eps, neighbors);

%time dbscan_instance.process();

segments = dbscan_instance.get_clusters();

draw_image_mask_segments(filename, segments);


Wall time: 37 ms

In [ ]:


In [ ]: