Kristaps Taube, kt11023

LD4


In [525]:
%matplotlib inline

In [527]:
import csv
with open('ld4/examples.txt') as f:
    data = [i for i in zip(*csv.reader(f, delimiter='\t'))]
data = [[int(k) for k in i] for i in data]

In [528]:
import matplotlib.pyplot as plt
plt.plot(data[0], data[1], 'ro')
plt.show()



In [529]:
# back to (x, y)
data = [i for i in zip(*data)]
data = np.array(data)

k-means


In [530]:
import numpy as np


def cluster_points(X, mu):
    clusters = {}
    for x in X:
        best = min([(i[0], np.linalg.norm(x - mu[i[0]]))
                         for i in enumerate(mu)], key=lambda t: t[1])[0]
        try:
            clusters[best].append(x)
        except KeyError:
            clusters[best] = [x]
    return clusters

# apkopo klusteru centrus
def centrs(clusters):
    newW = []
    keys = sorted(clusters.keys())
    for k in keys:
        newW.append(np.mean(clusters[k], axis=0))
    return newW


def converge(mu, oldmu):
    return (set([tuple(a) for a in mu]) == set([tuple(a) for a in oldmu]))


def kmeans(E, ccount, maxepochs=500):
    epochs = 0
    W = random.sample(list(E), ccount)
    W2 = random.sample(list(E), ccount)
    while not converge(W2, W) or epochs > maxepochs:
        epochs += 1
        W = W2
        clusters = cluster_points(E, W2)
        W2 = centrs(clusters)
    return (W2, clusters)

In [531]:
centers, clusters = kmeans(data, 4)
centroids = [i for i in zip(*centers)]
first = [i for i in zip(*clusters[0])]
second = [i for i in zip(*clusters[1])]
third = [i for i in zip(*clusters[2])]
forth = [i for i in zip(*clusters[3])]

In [532]:
# dzeltenie ir centri
plt.plot(centroids[0], centroids[1], 'ro', color='yellow')
plt.plot(first[0], first[1], 'ro', color='red')
plt.plot(second[0], second[1], 'ro', color='green')
plt.plot(third[0], third[1], 'ro', color='blue')
plt.plot(forth[0], forth[1], 'ro', color='black')
plt.show()


kohonen


In [533]:
# normalize data
data_perm = data.T
print(data_perm)
data = np.array([
    np.array(data_perm)[0] / np.max(np.abs(np.array(data)[0]),axis=0),
    np.array(data_perm)[1] / np.max(np.abs(np.array(data)[1]),axis=0)])
data = data.T
data


[[ 1  3  3  3  3  5  6  9  9  6 11 11  2 11  8 10  9  5  9  1  7 12  6 10
  11  4]
 [ 3  7  2  9  3  8  8  3  9 11 10  1  1  3 10  9  2  1 10  6  3  5  6  4
   7  6]]
Out[533]:
array([[ 0.33333333,  0.42857143],
       [ 1.        ,  1.        ],
       [ 1.        ,  0.28571429],
       [ 1.        ,  1.28571429],
       [ 1.        ,  0.42857143],
       [ 1.66666667,  1.14285714],
       [ 2.        ,  1.14285714],
       [ 3.        ,  0.42857143],
       [ 3.        ,  1.28571429],
       [ 2.        ,  1.57142857],
       [ 3.66666667,  1.42857143],
       [ 3.66666667,  0.14285714],
       [ 0.66666667,  0.14285714],
       [ 3.66666667,  0.42857143],
       [ 2.66666667,  1.42857143],
       [ 3.33333333,  1.28571429],
       [ 3.        ,  0.28571429],
       [ 1.66666667,  0.14285714],
       [ 3.        ,  1.42857143],
       [ 0.33333333,  0.85714286],
       [ 2.33333333,  0.42857143],
       [ 4.        ,  0.71428571],
       [ 2.        ,  0.85714286],
       [ 3.33333333,  0.57142857],
       [ 3.66666667,  1.        ],
       [ 1.33333333,  0.85714286]])

In [534]:
def kohonen(E, ccount, sig1, sig2, eta1, eta2, delta, maxepochs=500):
    epochs = 0
    W = random.sample(list(E), ccount)
    sig = sig1
    eta = eta1
    while epochs < maxepochs:
        epochs += 1
        for e in E:
            best = min([(i[0], np.linalg.norm(e - W[i[0]]))
                         for i in enumerate(W)], key=lambda t: t[1])[0]
            for w in W:
                dist = abs(W[best][0] - w[0]) + abs(W[best][1] - w[1])
                nfactor = math.exp(-((dist*dist)/(2*delta*delta)))
                W[best] = W[best] + eta * (e - W[best]) * nfactor
        sig += sig2 - sig * delta
        eta += eta2 - eta * delta
        
    return W

data = kohonen(data, 4, 1, 0.2, 0.1, 0.01, 0.01)
data


Out[534]:
[array([ 2.99798516,  1.42857643]),
 array([ 1.33732748,  0.85712754]),
 array([ 0.34126195,  0.85287637]),
 array([ 3.66469926,  0.99744529])]

In [537]:
data = np.array(data).T
print(data * np.max(np.abs(np.array(data_perm)[0]),axis=0))

plt.plot(data[0], data[1], 'ro', color='yellow')
plt.plot(first[0], first[1], 'ro', color='red')
plt.plot(second[0], second[1], 'ro', color='green')
plt.plot(third[0], third[1], 'ro', color='blue')
plt.plot(forth[0], forth[1], 'ro', color='black')
plt.show()
data


[[ 35.9758219   16.04792976   4.09514337  43.97639106]
 [ 17.14291721  10.28553043  10.23451645  11.96934349]]
Out[537]:
array([[ 2.99798516,  1.33732748,  0.34126195,  3.66469926],
       [ 1.42857643,  0.85712754,  0.85287637,  0.99744529]])

Diemžēl man nesanāca normalizēt vērības atpakaļ, jo saskāros ar kaut ko dīvainu reizinot numpy masīvus.