In [ ]:


In [ ]:
def knn(X, dataSet, k):
    dataSetSize = dataSet.shape[0]
    #distance calculation
    
    #sort results

    
    #vote using k lowest distances
    top_labels = distances.head(k).index.tolist()
    label_counts = dict((i, top_labels.count(i)) for i in top_labels)
    return max(label_counts.iteritems(), key=operator.itemgetter(1))[0]

In [ ]:
labels = ["A", "B", "C", "D"] * 25
random.shuffle(labels)

#Generate random points
N = 100
data = DataFrame(np.random.randint(0, 100, size=(N, 2)), columns = ["x", "y"], index = labels)

#Define a test point

In [ ]:
#Invoke the knn function

In [ ]:
# Plot the points
f = plt.figure()
ax = f.add_subplot(1, 1, 1)
ax.set_title("K-Nearest Neighbor")
a = ax.scatter(data.ix["A"]["x"], data.ix["A"]["y"], c="Blue", s=75)
b = ax.scatter(data.ix["B"]["x"], data.ix["B"]["y"], c="Green", s=75)
c = ax.scatter(data.ix["C"]["x"], data.ix["C"]["y"], c="Red", s=75)
d = ax.scatter(data.ix["D"]["x"], data.ix["D"]["y"], c="Black", s=75)
point = ax.scatter(test_point["x"], test_point["y"], c="Yellow", s=100)
ax.legend((a,b,c,d, point), ("A", "B", "C", "D", "test point"), bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,scatterpoints=1)