Decision Trees


In [1]:
from PIL import Image
import numpy as np
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from sklearn import datasets, tree
from sklearn import ensemble
matplotlib.style.use('bmh')
matplotlib.rcParams['figure.figsize']=(10,7)

MNIST


In [2]:
import gzip
import pickle
with gzip.open('../Week02/mnist.pkl.gz', 'rb') as f:
    train_set, validation_set, test_set = pickle.load(f, encoding='latin1')
    
train_X, train_y = train_set
test_X, test_y = test_set

In [3]:
train_X=train_X.reshape(-1, 28,28)[:,::2,::2].reshape(-1, 14*14)
test_X=test_X.reshape(-1, 28,28)[:,::2,::2].reshape(-1, 14*14)

In [4]:
clf = ensemble.AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=10), 
                                  n_estimators=50, 
                                  learning_rate=1,
                                 algorithm="SAMME.R")

In [5]:
%%timeit -n 1 -r 1
clf.fit(train_X, train_y)


1 loop, best of 1: 1min 13s per loop

In [6]:
%%timeit -n 1 -r 1
print(np.mean(clf.predict(train_X) == train_y))


0.99172
1 loop, best of 1: 1.2 s per loop

In [7]:
%%timeit -n 1 -r 1
print(np.mean(clf.predict(test_X) == test_y))


0.9346
1 loop, best of 1: 228 ms per loop

In [ ]: