notebook.community

Edit and run



In [1]:

    
import numpy as np
import scipy
import pandas
import treelib
import pyclust
import pandas

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline



In [2]:

    
pyclust.__version__









    Out[2]:





'0.1.3'



In [13]:

    
#### co-centric circles
d = np.random.uniform(low=-6, high=6, size=(4000,2))

d1a = d[(d[:,0]**2 + d[:,1]**2 <36) & (d[:,0]**2 + d[:,1]**2 >25), :]

d = np.random.uniform(low=-4, high=4, size=(4000,2))

d1b = d[(d[:,0]**2 + d[:,1]**2 <9) & (d[:,0]**2 + d[:,1]**2 > 4),:]
#d1c = d[(d[:,0]**2 + d[:,1]**2 <=1), :]

X = np.vstack((d1a,d1b))

print("Cluster sizes %s "%[d1a.shape[0], d1b.shape[0]]) #, d1c.shape[0]])









    



Cluster sizes [958, 924]



In [7]:

    
def plot_scatter(X, labels1=None, labels2=None, titles=["Scatter Plot", None]):
    
    labels1 = np.zeros(shape=X.shape[0], dtype=int) if labels1 is None else labels1
    
    colors = ['b', 'r', 'g', 'm', 'y']
    col_dict = {}
    
    for i,lab in enumerate(np.unique(labels1)):
        col_dict[lab] = colors[i]
        
    
    if labels2 is None:
        fig = plt.figure(1, figsize=(8,6))
        ax1 = fig.add_subplot(1, 1, 1)
    else:
        fig = plt.figure(1, figsize=(10,6))
        ax1 = fig.add_subplot(1, 2, 1)

    for i in np.unique(labels1):
        indx = np.where(labels1 == i)[0]
        plt.scatter(X[indx,0], X[indx,1], color=col_dict[i], marker='o', s=100, alpha=0.5)

    plt.setp(ax1.get_xticklabels(), rotation='horizontal', fontsize=16)
    plt.setp(ax1.get_yticklabels(), rotation='vertical', fontsize=16)

    plt.xlabel('$x_1$', size=20)
    plt.ylabel('$x_2$', size=20)
    plt.title(titles[0], size=20)

    
    if labels2 is not None:
        ax2 = fig.add_subplot(1, 2, 2)
        for i in np.unique(labels2):
            indx = np.where(labels2 == i)[0]
            plt.scatter(X[indx,0], X[indx,1], color=col_dict[i], marker='o', s=100, alpha=0.5)

        plt.setp(ax1.get_xticklabels(), rotation='horizontal', fontsize=16)
        plt.setp(ax1.get_yticklabels(), rotation='vertical', fontsize=16)

        plt.xlabel('$x_1$', size=20)
        plt.ylabel('$x_2$', size=20)
        plt.title(titles[1], size=20)
        
    plt.show()
    
## test plot original data
plot_scatter(X, labels1=None, titles=["Scatter Plot: Original Labels"])



In [23]:

    
## Performing KMeans  
km = pyclust.KMeans(n_clusters=3, n_trials=50)

km.fit(X)

### Performing Kernel K-means

kkm = pyclust.KernelKMeans(n_clusters=3, kernel='rbf')
kkm.fit(X)
print("Converged after %d iterations"%kkm.n_iter_)
plot_scatter(X, labels1=km.labels_, labels2=kkm.labels_, titles=["KMeans", "Kernel KMeans"])









    



Converged after 13 iterations



In [24]:

    
s1 = np.array([[0.02, 0.05], [0.5, 2.0]])
s2 = np.array([[0.6, 0.0], [0.0, 1.1]])
#s3 = np.array([[0.4, -0.5], [-0.04, 0.3]])
s3 = np.array([[0.4, -0.5], [-0.02, 0.2]])

m1 = np.array([-2.0, 1.0])
m2 = np.array([0.0, -3.0])
m3 = np.array([1.0, 2.0])

X1 = np.random.multivariate_normal(mean=m1, cov=s1, size=200)
X2 = np.random.multivariate_normal(mean=m2, cov=s2, size=300)
X3 = np.random.multivariate_normal(mean=m3, cov=s3, size=100)

X = np.vstack((X1, X2, X3))

indx_arr = np.arange(X.shape[0])
np.random.shuffle(indx_arr)

y = np.hstack((np.zeros(200, dtype=int), np.ones(300, dtype=int), 2*np.ones(100, dtype=int)))
X = X[indx_arr,:]
y = y[indx_arr]



In [25]:

    
kkm = pyclust.KernelKMeans(n_clusters=3, kernel='rbf')
kkm.fit(X)

plot_scatter(X, labels1=kkm.labels_, titles=["Scatter Plot: Original Labels"])



In [4]:

    
import sys
sys.path.append('/home/vahid/')

import ketnel_kmeans



In [16]:

    
km2 = ketnel_kmeans.KernelKMeans(n_clusters=2, kernel='rbf')

res = km2.fit_predict(X)



In [17]:

    
plot_scatter(X, labels1=res, titles=["Scatter Plot: Original Labels"])



In [ ]:



In [ ]: