In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import *

In [7]:
class KMeans:
    def __init__(self):
        
        #クラスタの色
        self.color = ["r","b","c","m","y","g"]
        
        #二値指示変数
        self.r = [1,0]
        
        #クラスタ数
        self.k = 6
        
        #データ数
        self.node = 300
        
    def dataset(self,ave1,ave2,cov1,cov2,node):
        
        #平均
        mu1 = [ave1,ave1]
        mu2 = [ave2,ave2]
        
        #共分散
        cov = [[cov1,cov2],[cov2,cov1]]
    
        #データの生成
        x1,y1 = np.random.multivariate_normal(mu1,cov,node).T
        x2,y2 = np.random.multivariate_normal(mu2,cov,node).T
        self.X = np.append(x1,x2)
        self.Y = np.append(y1,y2)
        
    def cluster_center(self,x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,x6,y6):
        
        #クラスター中心の初期値
        self.u = [[x1,x2,x3,x4,x5,x6],[y1,y2,y3,y4,y5,y6]]
        
        
    def param_init(self):
        self.count = []
        self.sum = []
        self.t =[]
        self.clus = []
        self.b = []
        
    def figure_plot(self,X,Y,u,k,clus):
        
        #新しい描画を開始
        plt.figure()
        plt.scatter(self.X,self.Y,c=clus,s=20,marker="o",edgecolors='k')
        plt.scatter(u[0][0:k],u[1][0:k],c=["r","b","c","m","y","g"],s=100,marker="x",edgecolors='k')
        self.distance(X,Y,u,k)  
        
    def distance(self,X,Y,u,k):
        #変数の初期化
        self.param_init()
        
        #配列の準備
        for i in range(0,k):
            self.b.append([u[0][i],u[1][i]])
            self.count.append(0)
            self.sum.append([0.0,0.0])
            self.t.append(0)
        self.sum = np.array(self.sum)
        
        for i,j in zip(X,Y):
            distance = []
            a = np.array([i, j])
            
            #各中心と各データの二乗距離
            for i in range(0,k):
                distance.append(np.linalg.norm(self.b[i] - a))
            
            #クラス分け
            self.clus.append(self.color[distance.index(min(distance))])
            self.count[distance.index(min(distance))] += 1
            self.sum[distance.index(min(distance))] += a
        
        #クラスタ中心の更新
        for i in range(0,k):
            self.t[i] = self.sum[i]/self.count[i]
        for i in range(0,2):
            for j in range(0,k):
                self.u[i][j] = self.t[j][i]
                
        #歪み尺度

In [8]:
if __name__ == '__main__':
    km = KMeans()
    km.dataset(-2,2,3,1,km.node)
    km.cluster_center(-5,5,-5,-4,0,-4,1,3,4,-5,6,6)
    km.figure_plot(km.X,km.Y,km.u,km.k,km.color[5])
    
    for i in range(10):
        km.figure_plot(km.X,km.Y,km.u,km.k,km.clus)