In [1]:
from sklearn import datasets
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.learning_curve import learning_curve
import matplotlib.pyplot as plt
In [2]:
iris = datasets.load_iris()
X, Y = iris.data, iris.target
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target)
In [4]:
model = KNeighborsClassifier()
In [5]:
model.fit(x_train, y_train)
Out[5]:
In [6]:
y_hat = model.predict(x_test)
In [26]:
conf_matrix = confusion_matrix(y_test, y_hat, labels=[0,1,2])
plt.imshow(conf_matrix, interpolation="nearest")
accuracy_score(y_test, y_hat) #94.73! not bad!
Out[26]:
In [9]:
train_sizes, train_scores, test_scores = learning_curve(model, X, Y, cv=5)
In [30]:
plt.plot(train_sizes, np.mean(train_scores, axis=1),'o-', label="Training score")
plt.plot(train_sizes, np.mean(test_scores, axis=1),'o-', label="Testing score")
plt.legend(loc="lower right")
Out[30]: