In [4]:
import sys
sys.path.append('/home/aseem/projects/ud120-projects/choose_your_own')
sys.path.append("/home/aseem/projects/website/udacity_data_science_notes/intro_machine_learning")
%matplotlib inline
def show_picture():
from ipython_picture import prettyPicture
plt = prettyPicture(clf, features_test, labels_test)
plt.show()
def show_accuracy():
from sklearn.metrics import accuracy_score
acc = accuracy_score(clf.predict(features_test), labels_test)
print acc
In [5]:
from prep_terrain_data import makeTerrainData
features_train, labels_train, features_test, labels_test = makeTerrainData()
In [13]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=4)
clf.fit(features_train, labels_train)
show_picture()
show_accuracy()
In [59]:
from sklearn.cross_validation import cross_val_score
from sklearn.ensemble import AdaBoostClassifier
all_scores = []
for i in xrange(1, 200):
clf = AdaBoostClassifier(n_estimators=i)
scores = cross_val_score(clf, features_train, labels_train)
all_scores.append(scores.mean())
Let's plot the accuracy for the various number of estimators
In [60]:
%matplotlib inline
import matplotlib.pyplot as plt
y = all_scores
plt.plot(range(1, len(y) + 1), y)
plt.show()
In [67]:
clf = AdaBoostClassifier(n_estimators=all_scores.index(max(all_scores)) + 1)
cross_val_score(clf, features_train, labels_train).mean()
Out[67]: