In [1]:
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
In [2]:
knn = KNeighborsClassifier(n_neighbors=3, p=2)
In [7]:
path_to_training_data="../data/training.txt"
path_to_test_data="../data/test.txt"
In [8]:
train = pd.read_csv(path_to_training_data, sep=",", header = None)
test = pd.read_csv(path_to_test_data, sep=",", header = None)
#train.columns = ["a", "b", "c", "d"]
In [9]:
test.ix[:,4]
Out[9]:
In [14]:
x_train = train.ix[:,:3]
x_test = test.ix[:,:3]
y_train = train.ix[:,4]
y_test = test.ix[:,4]
In [16]:
knn.fit(x_train, y_train)
Out[16]:
In [19]:
y_predicted = knn.predict(x_test)
In [18]:
def calculate_accuracy(y_true, y_predicted):
good = 0
for x in range(len(y_true)):
if y_true[x] == y_predicted[x]:
good += 1
return (good / len(y_true)) * 100
In [20]:
calculate_accuracy(y_test, y_predicted)
Out[20]: