In [102]:
import numpy as np
import pandas as pd
from matplotlib import pylab as plt
from collections import Counter
%matplotlib inline
In [190]:
df = pd.read_csv("data/iris.txt", delimiter=" ")
#df.sample(5)
df = df.sample(60)
In [191]:
def knn_learn(X, Y, predictX, k=3):
# finding sum of squared errors for each row
errors = np.sum(np.power(X - predictX, 2), axis=1)
# finding the nearest neighbours
minInd = 0
neighbors = []
for i in range(k):
for j in range(len(errors)):
if errors[j] <= errors[minInd]:
minInd = j
neighbors.append(
[float(errors[minInd]), float(Y[minInd])] )
errors[minInd] = np.inf
most_frequent = Counter(np.array(neighbors)[:,1]).most_common()
return most_frequent[0][0]
In [192]:
x = np.mat(df.iloc[:,[0, 1, 2, 3]])
y = np.mat(df.iloc[:,[4]])
px = np.mat([6.7, 3.3, 5.7, 2.5])
knn_learn(x, y, px)
Out[192]:
In [ ]: