In [17]:
class KNN:
def __init__(self):
self.k = 10
def set(self, k):
self.k = k
def euc_dist(self, a, b, dim):
return sum([(a[i] - b[i]) ** 2 for i in range(dim)])
# one test -> all trains
def get_k_neighbors(self, trains, test):
dims = len(test) - 1
dists = []
for i in range(len(trains)):
dist = self.euc_dist(trains[i], test, dims)
dists.append((trains[i], dist))
dists.sort(key = lambda x: x[1])
# get topk closest neighbors
k_neis = []
for i in range(self.k):
k_neis.append(dists[i][0])
return k_neis
def get_top1_vote(self, k_neis):
import collections
class_vote = collections.defaultdict(int)
for i in range(len(k_neis)):
class_vote[k_neis[i][-1]] += 1
sorted_vote = sorted(class_vote.items(), key = lambda x: -x[1])
return sorted_vote[0][0]
def get_accuracy(self, tests, pres):
correct = sum([1 for a, b in zip(tests, pres) if a == b])
return correct * 1.0 / len(tests)
In [19]:
trains = [(1, 1, 0), (2, 2, 0), (1, 2, 1), (2, 1, 1),
(-3, -3, 2), (-4, -4, 2), (-3, -4, 3), (-4, -3, 3)
]
tests = [(1, 1, 0), (2, 2, 0), (1, 2, 1), (2, 1, 1)]
kn = KNN()
kn.set(2)
pres = []
for i in range(len(tests)):
k_neis = kn.get_k_neighbors(trains, tests[i])
res = kn.get_top1_vote(k_neis)
pres.append(res)
print('predicted=' + str(res) + ', actual = ' + str(tests[i][-1]))
accuracy = kn.get_accuracy(tests, pres)
print('Accuracy: ' + str(accuracy))
In [ ]:
In [ ]: