In [525]:
%matplotlib inline
In [527]:
import csv
with open('ld4/examples.txt') as f:
data = [i for i in zip(*csv.reader(f, delimiter='\t'))]
data = [[int(k) for k in i] for i in data]
In [528]:
import matplotlib.pyplot as plt
plt.plot(data[0], data[1], 'ro')
plt.show()
In [529]:
# back to (x, y)
data = [i for i in zip(*data)]
data = np.array(data)
In [530]:
import numpy as np
def cluster_points(X, mu):
clusters = {}
for x in X:
best = min([(i[0], np.linalg.norm(x - mu[i[0]]))
for i in enumerate(mu)], key=lambda t: t[1])[0]
try:
clusters[best].append(x)
except KeyError:
clusters[best] = [x]
return clusters
# apkopo klusteru centrus
def centrs(clusters):
newW = []
keys = sorted(clusters.keys())
for k in keys:
newW.append(np.mean(clusters[k], axis=0))
return newW
def converge(mu, oldmu):
return (set([tuple(a) for a in mu]) == set([tuple(a) for a in oldmu]))
def kmeans(E, ccount, maxepochs=500):
epochs = 0
W = random.sample(list(E), ccount)
W2 = random.sample(list(E), ccount)
while not converge(W2, W) or epochs > maxepochs:
epochs += 1
W = W2
clusters = cluster_points(E, W2)
W2 = centrs(clusters)
return (W2, clusters)
In [531]:
centers, clusters = kmeans(data, 4)
centroids = [i for i in zip(*centers)]
first = [i for i in zip(*clusters[0])]
second = [i for i in zip(*clusters[1])]
third = [i for i in zip(*clusters[2])]
forth = [i for i in zip(*clusters[3])]
In [532]:
# dzeltenie ir centri
plt.plot(centroids[0], centroids[1], 'ro', color='yellow')
plt.plot(first[0], first[1], 'ro', color='red')
plt.plot(second[0], second[1], 'ro', color='green')
plt.plot(third[0], third[1], 'ro', color='blue')
plt.plot(forth[0], forth[1], 'ro', color='black')
plt.show()
In [533]:
# normalize data
data_perm = data.T
print(data_perm)
data = np.array([
np.array(data_perm)[0] / np.max(np.abs(np.array(data)[0]),axis=0),
np.array(data_perm)[1] / np.max(np.abs(np.array(data)[1]),axis=0)])
data = data.T
data
Out[533]:
In [534]:
def kohonen(E, ccount, sig1, sig2, eta1, eta2, delta, maxepochs=500):
epochs = 0
W = random.sample(list(E), ccount)
sig = sig1
eta = eta1
while epochs < maxepochs:
epochs += 1
for e in E:
best = min([(i[0], np.linalg.norm(e - W[i[0]]))
for i in enumerate(W)], key=lambda t: t[1])[0]
for w in W:
dist = abs(W[best][0] - w[0]) + abs(W[best][1] - w[1])
nfactor = math.exp(-((dist*dist)/(2*delta*delta)))
W[best] = W[best] + eta * (e - W[best]) * nfactor
sig += sig2 - sig * delta
eta += eta2 - eta * delta
return W
data = kohonen(data, 4, 1, 0.2, 0.1, 0.01, 0.01)
data
Out[534]:
In [537]:
data = np.array(data).T
print(data * np.max(np.abs(np.array(data_perm)[0]),axis=0))
plt.plot(data[0], data[1], 'ro', color='yellow')
plt.plot(first[0], first[1], 'ro', color='red')
plt.plot(second[0], second[1], 'ro', color='green')
plt.plot(third[0], third[1], 'ro', color='blue')
plt.plot(forth[0], forth[1], 'ro', color='black')
plt.show()
data
Out[537]:
Diemžēl man nesanāca normalizēt vērības atpakaļ, jo saskāros ar kaut ko dīvainu reizinot numpy masīvus.