In [1]:
from pml.api import *

data = load("../dataset_ext2.csv")
data.fill_missing_with_feature_means()

In [2]:
# Count successful and probation students as one group (s)
data.combine_labels(["s", "p"], "s")

In [3]:
# Take a 50-50 split for training and testing
training, testing = data.split(0.5, using_labels=True)

In [4]:
# Test accuracy of KNN for different k values
for k in range(3, 10):
    accuracy = Knn(training, k).classify_all(testing).compute_accuracy()
    print "k=%d: %2.5f %%" % (k, 100 * accuracy)


k=3: 75.00000 %
k=4: 72.50000 %
k=5: 72.50000 %
k=6: 75.00000 %
k=7: 75.00000 %
k=8: 72.50000 %
k=9: 70.00000 %

In [ ]: