In [92]:
import numpy as np
from math import sqrt
import matplotlib.pyplot as plt
import warnings
from matplotlib import style
from collections import Counter
style.use('fivethirtyeight')

%matplotlib notebook

In [93]:
# Sample Feature sets
dataset = {'k':[[1,2],[2,3],[3,1]], 'r':[[6,5],[7,7],[8,6]]}
new_features = [5,7]
[[plt.scatter(ii[0],ii[1],s=100,color=i) for ii in dataset[i]] for i in dataset]
plt.scatter(new_features[0],new_features[1],s=100)
plt.show()



In [102]:
def k_nearest_neighbors(data,predict,k=3):
    print(data)
    if(len(data) >= k):
        warnings.warn('K is set to a value less than total voting groups')
       
    distances = []
    # The predicted point finds distance from all the data points.
    for group in data:
        for features in data[group]:
#             euclidean_distance = sqrt((plot1[0] - plot2[0])**2 + (plot1[1] - plot2[1])**2)
#             eucledian_distance = sqrt( (features[0]-predict[0])**2 + (features[1]-predict[1])**2)
#             eucledian_distance = np.sqrt(np.sum((np.array(features)-np.array(predict))**2))
            eucledian_distance = np.linalg.norm( np.array(features) - np.array(predict))
            distances.append([eucledian_distance,group])
            
    # Taking top 5 distances and their group ID's or clusters
    votes = [i[1] for i in sorted(distances)[:k]]
    
    # Count of each group ID or clusters
    vote_result = Counter(votes)
    
    # Find the cluster which has maximum closest points to the predicted point.
    vote_result = vote_result.most_common(1)[0][0]
    
    # Finding Confidence. Highest number of nodes closer to the predicted point divided by cluster size.
    confidence = Counter(votes).most_common(1)[0][1] / k
    
    return vote_result, confidence

In [103]:
result, confidence = k_nearest_neighbors(dataset, new_features, k=3)

[[plt.scatter(ii[0],ii[1],s=100,color=i) for ii in dataset[i]] for i in dataset]
plt.scatter(new_features[0],new_features[1],s=100,color=result)
plt.show()

print(confidence)


2
1.0

In [ ]:


In [ ]: