notebook.community

Edit and run



In [1]:

    
from pml.api import *

data = load("../dataset_ext2.csv")
data.fill_missing_with_feature_means()



In [2]:

    
# Count successful and probation students as one group (s)
data.combine_labels(["s", "p"], "s")



In [3]:

    
# Take a 50-50 split for training and testing
training, testing = data.split(0.5, using_labels=True)



In [4]:

    
# Test accuracy of KNN for different k values
for k in range(3, 10):
    accuracy = Knn(training, k).classify_all(testing).compute_accuracy()
    print "k=%d: %2.5f %%" % (k, 100 * accuracy)









    



k=3: 75.00000 %
k=4: 72.50000 %
k=5: 72.50000 %
k=6: 75.00000 %
k=7: 75.00000 %
k=8: 72.50000 %
k=9: 70.00000 %



In [ ]: