In [ ]:
import pandas as pd
import numpy as np
import pylab as pl
from sklearn.neighbors import KNeighborsClassifier
%matplotlib inline
In [ ]:
ccdefault = pd.read_csv('~/Desktop/ccdefault.csv')
list(ccdefault.columns.values)
In [ ]:
ccd = ccdefault[ccdefault.columns[1:]]
list(ccd.columns.values)
In [ ]:
test_idx = np.random.uniform(0, 1, len(ccd)) <= .333
train = ccd[test_idx == True]
test = ccd[test_idx == False]
In [ ]:
train.head()
In [ ]:
features = [ 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6', 'DEFAULT' ]
In [ ]:
n = 5 # Neighbors
results = []
clf = KNeighborsClassifier(n_neighbors = n)
clf.fit(train[features], train['DEFAULT'])
preds = clf.predict(test[features])
accuracy = np.where(preds == test['DEFAULT'], 1, 0).sum() / float(len(test))*100
In [ ]:
print "Neighbors: %d, Accuracy: %2d%%" % (n, round(accuracy, 2))
In [ ]: