notebook.community

Edit and run



In [102]:

    
import numpy as np
import pandas as pd
from matplotlib import pylab as plt
from collections import Counter
%matplotlib inline



In [190]:

    
df = pd.read_csv("data/iris.txt", delimiter=" ")
#df.sample(5)
df = df.sample(60)



In [191]:

    
def knn_learn(X, Y, predictX, k=3):
    # finding sum of squared errors for each row
    errors = np.sum(np.power(X - predictX, 2), axis=1)
    
    # finding the nearest neighbours
    minInd = 0
    neighbors = []
    for i in range(k):
        for j in range(len(errors)):
            if errors[j] <= errors[minInd]:
                minInd = j
        
        neighbors.append(
            [float(errors[minInd]), float(Y[minInd])] )
        errors[minInd] = np.inf
    
    most_frequent = Counter(np.array(neighbors)[:,1]).most_common()
    return most_frequent[0][0]



In [192]:

    
x = np.mat(df.iloc[:,[0, 1, 2, 3]])
y = np.mat(df.iloc[:,[4]])
px = np.mat([6.7, 3.3, 5.7, 2.5])
knn_learn(x, y, px)









    Out[192]:





3.0



In [ ]: