Assignment 5

K Nearest Neighbours

To classify 3 different types of irises (Setosa, Versicolour, and Virginica)


In [1]:
import numpy as np
from sklearn import datasets
dataset = datasets.load_iris()
A = dataset.data
B = dataset.target
Function to find euclidean distance between two points

In [2]:
def distance(point1, point2):
    diff = point1 - point2
    sq_point = np.square(diff)
    dist = np.sum(sq_point)
    dist = dist ** (0.5)
    return dist
Function to find class of first k nearest neighbours

In [3]:
import operator
def findNeighbour(X_train, Y_train, X_test_instance):
    k = 5
    distances = []
    len_train = len(X_train)
    for i in range(len_train):
        dist = distance(X_train[i], X_test_instance)
        distances.append((Y_train[i], dist))    
    distances.sort(key = operator.itemgetter(1))
    neighbor_class = []
    for i in range(k):
        neighbor_class.append(distances[i][0])   
    return neighbor_class
Function to find the most probable class

In [4]:
def findClass(neighbour_class):
    class_count = {}
    length = len(neighbour_class)
    for i in range(length):
        class_ = neighbour_class[i]
        if class_ in class_count:
            class_count[class_] = class_count[class_] + 1
        else:
            class_count[class_] = 1
    sort_counts = sorted(class_count.items(), key = operator.itemgetter(1), reverse = True)
    return sort_counts[0][0]
Split data into training and testing sets randomly

In [5]:
import random
X_train = []
Y_train = []
X_test = []
Y_test = []
length = len(A)
split = 0.67
for i in range(length):
    if random.random() < split:
        X_train.append(A[i])
        Y_train.append(B[i])
    else:
        X_test.append(A[i])
        Y_test.append(B[i])
Making predictions

In [6]:
len_train = len(X_train)
len_test = len(X_test)
predictions = []
for i in range(len_test):
    neighbors = findNeighbour(X_train, Y_train, X_test[i])
    class_ = findClass(neighbors)
    predictions.append(class_)
Accuracy of predictions

In [7]:
count_correct = 0
for i in range(len_test):
    if predictions[i] == Y_test[i]:
        count_correct = count_correct + 1
accuracy = count_correct * 100 / len_test
print("Accuracy : {0:.2f} %".format(accuracy))


Accuracy : 93.22 %

In [ ]: