In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.neighbors import KNeighborsClassifier
from sklearn import cross_validation
In [ ]:
import numpy as np
In [ ]:
df = pd.read_csv("data/wine.csv")
In [ ]:
df.columns
In [ ]:
df.high_quality.unique()
In [ ]:
train, test = cross_validation.train_test_split(df[['density','sulphates','residual_sugar','high_quality']],train_size=0.75)
In [ ]:
train
In [ ]:
x_train = train[:,:3]
y_train = train[:,3]
In [ ]:
x_test = test[:,:3]
y_test = test[:,3]
In [ ]:
clf = KNeighborsClassifier(n_neighbors=1)
In [ ]:
clf.fit(x_train,y_train)
In [ ]:
preds = clf.predict(x_test)
In [ ]:
accuracy = np.where(preds==y_test, 1, 0).sum() / float(len(test))
In [ ]:
print "Accuracy: %3f" % (accuracy,)
In [ ]:
results = []
for k in range(1, 51, 2):
clf = KNeighborsClassifier(n_neighbors=k)
clf.fit(x_train,y_train)
preds = clf.predict(x_test)
accuracy = np.where(preds==y_test, 1, 0).sum() / float(len(test))
print "Neighbors: %d, Accuracy: %3f" % (k, accuracy)
results.append([k, accuracy])
results = pd.DataFrame(results, columns=["k", "accuracy"])
plt.plot(results.k, results.accuracy)
plt.title("Accuracy with Increasing K")
plt.show()
In [ ]:
results = []
for k in range(1, 51, 2):
clf = KNeighborsClassifier(n_neighbors=k,weights='distance')
clf.fit(x_train,y_train)
preds = clf.predict(x_test)
accuracy = np.where(preds==y_test, 1, 0).sum() / float(len(test))
print "Neighbors: %d, Accuracy: %3f" % (k, accuracy)
results.append([k, accuracy])
results = pd.DataFrame(results, columns=["k", "accuracy"])
plt.plot(results.k, results.accuracy)
plt.title("Accuracy with Increasing K")
plt.show()