In [1]:
import pandas as pd
import pylab as pl
from sklearn.neighbors import KNeighborsClassifier
In [2]:
df = pd.read_csv("https://s3.amazonaws.com/demo-datasets/wine.csv")
test_idx = np.random.uniform(0, 1, len(df)) <= 0.3
train = df[test_idx==True]
test = df[test_idx==False]
features = ['density', 'sulphates', 'residual_sugar']
results = []
for n in range(1, 51, 2):
clf = KNeighborsClassifier(n_neighbors=n)
clf.fit(train[features], train['high_quality'])
preds = clf.predict(test[features])
accuracy = np.where(preds==test['high_quality'], 1, 0).sum() / float(len(test))
print("Neighbors: %d, Accuracy: %3f" % (n, accuracy))
results.append([n, accuracy])
results = pd.DataFrame(results, columns=["n", "accuracy"])
pl.plot(results.n, results.accuracy)
pl.title("Accuracy with Increasing K")
pl.show()
In [ ]:
import numpy as np
df = pd.read_csv("https://s3.amazonaws.com/demo-datasets/wine.csv")
test_idx = np.random.uniform(0, 1, len(df)) <= 0.3
train = df[test_idx==True]
test = df[test_idx==False]
features = ['density', 'sulphates', 'residual_sugar']
results = []
for n in range(1, 51, 2):
clf = KNeighborsClassifier(n_neighbors=n)
clf.fit(train[features], train['high_quality'])
preds = clf.predict(test[features])
accuracy = np.where(preds==test['high_quality'], 1, 0).sum() / float(len(test))
print("Neighbors: %d, Accuracy: %3f" % (n, accuracy))
results.append([n, accuracy])
results = pd.DataFrame(results, columns=["n", "accuracy"])
pl.plot(results.n, results.accuracy)
pl.title("Accuracy with Increasing K")
pl.show()
In [ ]:
%matplotlib inline
In [ ]:
df = pd.read_csv("https://s3.amazonaws.com/demo-datasets/wine.csv")
test_idx = np.random.uniform(0, 1, len(df)) <= 0.3
train = df[test_idx==True]
test = df[test_idx==False]
features = ['density', 'sulphates', 'residual_sugar']
results = []
for n in range(1, 51, 2):
clf = KNeighborsClassifier(n_neighbors=n)
clf.fit(train[features], train['high_quality'])
preds = clf.predict(test[features])
accuracy = np.where(preds==test['high_quality'], 1, 0).sum() / float(len(test))
print("Neighbors: %d, Accuracy: %3f" % (n, accuracy))
results.append([n, accuracy])
results = pd.DataFrame(results, columns=["n", "accuracy"])
pl.plot(results.n, results.accuracy)
pl.title("Accuracy with Increasing K")
pl.show()
In [ ]: