notebook.community

Edit and run



In [70]:

    
# import
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler


%matplotlib inline



In [37]:

    
df = pd.read_csv("data/seeds_dataset.txt", delimiter='|', header=None)
df[:3]



In [65]:

    
X = df[df.columns[0:7]]
y = df[7]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=23)

Implementing KNN classifier with K = 1



In [56]:

    
clf = KNeighborsClassifier(n_neighbors=1)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

scores = accuracy_score(y_test, y_pred)

print("Scores : ", scores)









    



Scores :  0.936507936508

Simulating same with KFold



In [69]:

    
clf = KNeighborsClassifier(n_neighbors=1)

kf = KFold(n_splits=5, random_state=34, shuffle=True)
print(kf)

X = np.array(X)
y = np.array(y)

scores = []
for train, test in kf.split(X):
    clf.fit(X[train], y[train])
    pred = clf.predict(X[test])
    scores.append(accuracy_score(y[test], pred))
    
print("Scores: ", scores)
print("Mean Scores: ", np.mean(scores))









    



KFold(n_splits=5, random_state=34, shuffle=True)
Scores:  [0.95238095238095233, 0.8571428571428571, 0.90476190476190477, 0.90476190476190477, 0.8571428571428571]
Mean Scores:  0.895238095238

Standardizing the data and creating pipeline



In [72]:

    
clf = Pipeline([('scale', StandardScaler()), 
                    ('knn', KNeighborsClassifier(n_neighbors=1))])

scores = []
for train, test in kf.split(X):
    clf.fit(X[train], y[train])
    pred = clf.predict(X[test])
    scores.append(accuracy_score(y[test], pred))
    
print("Scores: ", scores)
print("Mean Scores: ", np.mean(scores))









    



Scores:  [0.9285714285714286, 0.9285714285714286, 0.9285714285714286, 0.97619047619047616, 0.90476190476190477]
Mean Scores:  0.933333333333



In [ ]:

	0	1	2	3	4	5	6	7
0	15.26	14.84	0.8710	5.763	3.312	2.221	5.220	1
1	14.88	14.57	0.8811	5.554	3.333	1.018	4.956	1
2	14.29	14.09	0.9050	5.291	3.337	2.699	4.825	1