In [4]:
import sys
sys.path.append('/home/aseem/projects/ud120-projects/choose_your_own')
sys.path.append("/home/aseem/projects/website/udacity_data_science_notes/intro_machine_learning")

%matplotlib inline

def show_picture():
    from ipython_picture import prettyPicture
    
    plt = prettyPicture(clf, features_test, labels_test)
    plt.show()
    
def show_accuracy():
    from sklearn.metrics import accuracy_score
    
    acc = accuracy_score(clf.predict(features_test), labels_test)
    print acc

Lesson 04 - Choose your own algorithm


In [5]:
from prep_terrain_data import makeTerrainData

features_train, labels_train, features_test, labels_test = makeTerrainData()

In [13]:
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors=4)
clf.fit(features_train, labels_train) 

show_picture()
show_accuracy()


0.94

In [59]:
from sklearn.cross_validation import cross_val_score
from sklearn.ensemble import AdaBoostClassifier

all_scores = []
for i in xrange(1, 200):
    clf = AdaBoostClassifier(n_estimators=i)
    scores = cross_val_score(clf, features_train, labels_train)
    all_scores.append(scores.mean())

Let's plot the accuracy for the various number of estimators


In [60]:
%matplotlib inline

import matplotlib.pyplot as plt

y = all_scores

plt.plot(range(1, len(y) + 1), y)
plt.show()



In [67]:
clf = AdaBoostClassifier(n_estimators=all_scores.index(max(all_scores)) + 1)
cross_val_score(clf, features_train, labels_train).mean()


Out[67]:
0.96134892824951867