In [1]:
import numpy as np
from sklearn import datasets
dataset = datasets.load_iris()
A = dataset.data
B = dataset.target
In [2]:
def distance(point1, point2):
diff = point1 - point2
sq_point = np.square(diff)
dist = np.sum(sq_point)
dist = dist ** (0.5)
return dist
In [3]:
import operator
def findNeighbour(X_train, Y_train, X_test_instance):
k = 5
distances = []
len_train = len(X_train)
for i in range(len_train):
dist = distance(X_train[i], X_test_instance)
distances.append((Y_train[i], dist))
distances.sort(key = operator.itemgetter(1))
neighbor_class = []
for i in range(k):
neighbor_class.append(distances[i][0])
return neighbor_class
In [4]:
def findClass(neighbour_class):
class_count = {}
length = len(neighbour_class)
for i in range(length):
class_ = neighbour_class[i]
if class_ in class_count:
class_count[class_] = class_count[class_] + 1
else:
class_count[class_] = 1
sort_counts = sorted(class_count.items(), key = operator.itemgetter(1), reverse = True)
return sort_counts[0][0]
In [5]:
import random
X_train = []
Y_train = []
X_test = []
Y_test = []
length = len(A)
split = 0.67
for i in range(length):
if random.random() < split:
X_train.append(A[i])
Y_train.append(B[i])
else:
X_test.append(A[i])
Y_test.append(B[i])
In [6]:
len_train = len(X_train)
len_test = len(X_test)
predictions = []
for i in range(len_test):
neighbors = findNeighbour(X_train, Y_train, X_test[i])
class_ = findClass(neighbors)
predictions.append(class_)
In [7]:
count_correct = 0
for i in range(len_test):
if predictions[i] == Y_test[i]:
count_correct = count_correct + 1
accuracy = count_correct * 100 / len_test
print("Accuracy : {0:.2f} %".format(accuracy))
In [ ]: