k-nearest neighbors algorithm


In [1]:
import numpy as np
import operator

In [2]:
#create dataset
def createDataset():
    dataset = np.array([[1.0,1.1], [1.0,1.0], [0,0], [0,0.1]])
    labels = [1., 1., 0., 0.]
    return dataset, labels

In [3]:
dataset, labels = createDataset()

In [4]:
dataset


Out[4]:
array([[ 1. ,  1.1],
       [ 1. ,  1. ],
       [ 0. ,  0. ],
       [ 0. ,  0.1]])

In [5]:
labels


Out[5]:
[1.0, 1.0, 0.0, 0.0]

In [6]:
def knn(x, dataset, labels, k):
    ##計算x與各點之間距離
    distance = (((dataset - x) ** 2).sum(axis=1)) ** 0.5
    ##創造候選人array
    ##將距離和label結合
    distance = np.vstack((distance,labels)).T
    
    #初始值
    candidate = np.sort(distance[:k], axis=0)
        
    for i in range(k, len(distance)):
        #print candidate
        if distance[i][0] < candidate[:,0].any():
            ##把最大的數踢走,加入剛剛小的數字
            candidate = np.sort(np.vstack((candidate[:-1,:], distance[i])), axis=0)
                     
    result=np.zeros(len(candidate))
    for i in candidate[:,1]:
        result[i] += 1

    return np.argmax(result)

In [7]:
knn([0,0], dataset, labels, 2)


/Library/Python/2.7/site-packages/ipykernel/__main__.py:19: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
Out[7]:
0

In [ ]: