In [1]:
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

In [2]:
knn = KNeighborsClassifier(n_neighbors=3, p=2)

In [7]:
path_to_training_data="../data/training.txt"
path_to_test_data="../data/test.txt"

In [8]:
train = pd.read_csv(path_to_training_data, sep=",", header = None)
test = pd.read_csv(path_to_test_data, sep=",", header = None)
#train.columns = ["a", "b", "c", "d"]

In [9]:
test.ix[:,4]


Out[9]:
0     Iris-virginica
1     Iris-virginica
2    Iris-versicolor
3        Iris-setosa
4        Iris-setosa
5        Iris-setosa
Name: 4, dtype: object

In [14]:
x_train = train.ix[:,:3]
x_test = test.ix[:,:3]

y_train = train.ix[:,4]
y_test = test.ix[:,4]

In [16]:
knn.fit(x_train, y_train)


Out[16]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')

In [19]:
y_predicted = knn.predict(x_test)

In [18]:
def calculate_accuracy(y_true, y_predicted):
    good = 0
    for x in range(len(y_true)):
        if y_true[x] == y_predicted[x]:
            good += 1
    return (good / len(y_true)) * 100

In [20]:
calculate_accuracy(y_test, y_predicted)


Out[20]:
83.33333333333334