In [1]:
from pml.api import *
data = load("../dataset_ext2.csv")
data.fill_missing_with_feature_means()
In [2]:
# Count successful and probation students as one group (s)
data.combine_labels(["s", "p"], "s")
In [3]:
# Take a 50-50 split for training and testing
training, testing = data.split(0.5, using_labels=True)
In [4]:
# Test accuracy of KNN for different k values
for k in range(3, 10):
accuracy = Knn(training, k).classify_all(testing).compute_accuracy()
print "k=%d: %2.5f %%" % (k, 100 * accuracy)
In [ ]: