In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from pyrallel.ensemble import EnsembleGrower
from pyrallel.ensemble import sub_ensemble
from sklearn.datasets import load_digits
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.cross_validation import train_test_split

In [2]:
from IPython.parallel import Client
lb = Client().load_balanced_view()
len(lb)


Out[2]:
4

In [3]:
digits = load_digits()

X_train, X_test, y_train, y_test = train_test_split(
    digits.data, digits.target)

In [4]:
grower = EnsembleGrower(lb, ExtraTreesClassifier(n_estimators=5))

In [5]:
grower.launch(X_train, y_train, n_estimators=100,
              folder='digits', name="digits_trees")


Out[5]:
Progress: 00% (000/100), elapsed: 0.455s

In [9]:
grower


Out[9]:
Progress: 100% (100/100), elapsed: 4.686s

In [10]:
grower.wait()


Out[10]:
Progress: 100% (100/100), elapsed: 4.686s

In [12]:
%time final_model = grower.aggregate_model()
print("number of trees: {}".format(final_model.n_estimators))


CPU times: user 2.84 s, sys: 304 ms, total: 3.14 s
Wall time: 3.25 s
number of trees: 500

In [13]:
score = final_model.score(X_test, y_test)
print("score: {:.3f}".format(score))


score: 0.996

In [ ]: