In [1]:
import numpy as np
import operator
In [2]:
#create dataset
def createDataset():
dataset = np.array([[1.0,1.1], [1.0,1.0], [0,0], [0,0.1]])
labels = [1., 1., 0., 0.]
return dataset, labels
In [3]:
dataset, labels = createDataset()
In [4]:
dataset
Out[4]:
In [5]:
labels
Out[5]:
In [6]:
def knn(x, dataset, labels, k):
##計算x與各點之間距離
distance = (((dataset - x) ** 2).sum(axis=1)) ** 0.5
##創造候選人array
##將距離和label結合
distance = np.vstack((distance,labels)).T
#初始值
candidate = np.sort(distance[:k], axis=0)
for i in range(k, len(distance)):
#print candidate
if distance[i][0] < candidate[:,0].any():
##把最大的數踢走,加入剛剛小的數字
candidate = np.sort(np.vstack((candidate[:-1,:], distance[i])), axis=0)
result=np.zeros(len(candidate))
for i in candidate[:,1]:
result[i] += 1
return np.argmax(result)
In [7]:
knn([0,0], dataset, labels, 2)
Out[7]:
In [ ]: