In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import pylab as pl
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

In [3]:
df = pd.read_csv("https://s3.amazonaws.com/demo-datasets/wine.csv")
 
test_idx = np.random.uniform(0, 1, len(df)) <= 0.3
train = df[test_idx==True]
test = df[test_idx==False]

features = ['density', 'sulphates', 'residual_sugar']
 
results = []
for n in range(1, 51, 2):
    clf = KNeighborsClassifier(n_neighbors=n)
    clf.fit(train[features], train['high_quality'])
    preds = clf.predict(test[features])
    accuracy = np.where(preds==test['high_quality'], 1, 0).sum() / float(len(test))
    print("Neighbors: %d, Accuracy: %3f" % (n, accuracy))
 
    results.append([n, accuracy])
 
results = pd.DataFrame(results, columns=["n", "accuracy"])
 
pl.plot(results.n, results.accuracy)
pl.title("Accuracy with Increasing K")
pl.show()


Neighbors: 1, Accuracy: 0.748030
Neighbors: 3, Accuracy: 0.768608
Neighbors: 5, Accuracy: 0.791813
Neighbors: 7, Accuracy: 0.791594
Neighbors: 9, Accuracy: 0.794221
Neighbors: 11, Accuracy: 0.792688
Neighbors: 13, Accuracy: 0.795096
Neighbors: 15, Accuracy: 0.800350
Neighbors: 17, Accuracy: 0.803853
Neighbors: 19, Accuracy: 0.802977
Neighbors: 21, Accuracy: 0.802539
Neighbors: 23, Accuracy: 0.802758
Neighbors: 25, Accuracy: 0.803853
Neighbors: 27, Accuracy: 0.803415
Neighbors: 29, Accuracy: 0.803415
Neighbors: 31, Accuracy: 0.803415
Neighbors: 33, Accuracy: 0.803196
Neighbors: 35, Accuracy: 0.803415
Neighbors: 37, Accuracy: 0.803415
Neighbors: 39, Accuracy: 0.803415
Neighbors: 41, Accuracy: 0.803415
Neighbors: 43, Accuracy: 0.803415
Neighbors: 45, Accuracy: 0.803415
Neighbors: 47, Accuracy: 0.803415
Neighbors: 49, Accuracy: 0.803415

In [ ]: