In [17]:
class KNN:
    def __init__(self):
        self.k = 10
    
    def set(self, k):
        self.k = k
    
    def euc_dist(self, a, b, dim):
        return sum([(a[i] - b[i]) ** 2 for i in range(dim)])
        
    # one test -> all trains
    def get_k_neighbors(self, trains, test):
        dims = len(test) - 1
        dists = []
        for i in range(len(trains)):
            dist = self.euc_dist(trains[i], test, dims)
            dists.append((trains[i], dist))
        dists.sort(key = lambda x: x[1])
        
        # get topk closest neighbors
        k_neis = []
        for i in range(self.k):
            k_neis.append(dists[i][0])
        return k_neis
    
    def get_top1_vote(self, k_neis):
        import collections
        class_vote = collections.defaultdict(int)
        for i in range(len(k_neis)):
            class_vote[k_neis[i][-1]] += 1
        sorted_vote = sorted(class_vote.items(), key = lambda x: -x[1])
        return sorted_vote[0][0]
    
    def get_accuracy(self, tests, pres):
        correct = sum([1 for a, b in zip(tests, pres) if a == b])
        return correct * 1.0 / len(tests)

In [19]:
trains = [(1, 1, 0), (2, 2, 0), (1, 2, 1), (2, 1, 1),
          (-3, -3, 2), (-4, -4, 2), (-3, -4, 3), (-4, -3, 3)
          ]
tests = [(1, 1, 0), (2, 2, 0), (1, 2, 1), (2, 1, 1)]

kn = KNN()
kn.set(2)
pres = []
for i in range(len(tests)):
    k_neis = kn.get_k_neighbors(trains, tests[i])
    res = kn.get_top1_vote(k_neis)
    pres.append(res)
    print('predicted=' + str(res) + ', actual = ' + str(tests[i][-1]))

accuracy = kn.get_accuracy(tests, pres)
print('Accuracy: ' + str(accuracy))


predicted=0, actual = 0
predicted=0, actual = 0
predicted=0, actual = 1
predicted=0, actual = 1
Accuracy: 0.0

In [ ]:


In [ ]: