In [7]:
import sklearn
from sklearn import ensemble, datasets, cross_validation, metrics

In [8]:
boston = datasets.load_boston()
boston.data.shape


Out[8]:
(506, 13)

In [8]:


In [9]:
friedman1 = datasets.make_friedman1(n_samples=10000, n_features=20, noise=1.)
friedman1
friedman1[0].shape


Out[9]:
(10000, 20)

In [10]:
X = friedman1[0]
y = friedman1[1]
X_train, X_valid, y_train, y_valid = cross_validation.train_test_split(X, y, train_size=0.7)

In [11]:
X_train.shape


Out[11]:
(7000, 20)

In [12]:
def regression_report(y_true, y_pred):
    print '\t explained variance:', metrics.regression.explained_variance_score(y_true, y_pred)

In [27]:
model_gbc = ensemble.GradientBoostingRegressor(alpha=0.9, init=None, learning_rate=0.1, loss='ls',
             max_depth=6, 
             max_features=None, 
             #max_leaf_nodes=15,
             min_samples_leaf=20, min_samples_split=40,
             min_weight_fraction_leaf=0.0, n_estimators=70,
             random_state=None, subsample=0.8, tree_params_producer=None,
             verbose=1, warm_start=False)

model_gbc.fit(X_train, y_train)


print ' '
print 'Train:'
regression_report(y_train, model_gbc.predict(X_train))
print ' '
print 'Test:'
regression_report(y_valid, model_gbc.predict(X_valid))


      Iter       Train Loss      OOB Improve   Remaining Time 
         1          21.1862           3.4779            4.98s
         2          17.9907           3.0230            4.52s
         3          15.6662           2.4718            4.52s
         4          13.5447           1.9948            4.38s
         5          11.9471           1.5724            4.30s
         6          10.3941           1.3790            4.19s
         7           9.2391           1.1929            4.08s
         8           8.1028           0.9529            4.01s
         9           7.2339           0.7755            3.93s
        10           6.4155           0.7200            3.84s
        20           2.6969           0.1517            3.15s
        30           1.5622           0.0398            2.46s
        40           1.1068           0.0085            1.84s
        50           0.9194           0.0007            1.21s
        60           0.8084           0.0008            0.60s
        70           0.7397          -0.0002            0.00s
 
Train:
	 explained variance: 0.970261473808
 
Test:
	 explained variance: 0.944339772202

In [28]:
def linear_variable(vfrom, vto, N):
    return lambda i: int(i * 1. / N * (vto - vfrom) + vfrom)


def tree_params_producer_variable_depth(depth_from, depth_to, n_estimators):
    variable_depth_foo = linear_variable(depth_from, depth_to, n_estimators)
    return lambda stage: {
            'max_depth': variable_depth_foo(stage),
            'min_samples_split': 2,
            'min_samples_leaf': 1,
            'min_weight_fraction_leaf': 0.0,                
            'max_features': None,
            'max_leaf_nodes': None}


model_gbc = ensemble.GradientBoostingRegressor(alpha=0.9, init=None, learning_rate=0.1, loss='ls',
             #max_depth=8, 
             #max_features=None, 
             #max_leaf_nodes=15,
             #min_samples_leaf=20, min_samples_split=40,
             #min_weight_fraction_leaf=0.0, 
             n_estimators=80,
             random_state=None, 
             subsample=0.8, 
             tree_params_producer=tree_params_producer_variable_depth(1, 10, 80),
             verbose=1, warm_start=False)

model_gbc.fit(X_train, y_train)


print ' '
print 'Train:'
regression_report(y_train, model_gbc.predict(X_train))
print ' '
print 'Test:'
regression_report(y_valid, model_gbc.predict(X_valid))


      Iter       Train Loss      OOB Improve   Remaining Time 
         1          23.8465           1.2325            1.66s
         2          22.4922           1.0192            1.18s
         3          21.9183           0.8435            1.00s
         4          21.1896           0.6217            0.92s
         5          19.9862           0.7378            0.85s
         6          19.5505           0.6971            0.83s
         7          19.1929           0.6270            0.81s
         8          18.5405           0.5842            0.78s
         9          17.8962           0.5519            0.77s
        10          17.1822           0.9675            0.82s
        20          10.0114           0.5996            0.98s
        30           5.4769           0.3543            1.06s
        40           3.1294           0.1355            1.05s
        50           1.8813           0.0599            0.96s
        60           1.2367           0.0318            0.78s
        70           0.8239           0.0190            0.47s
        80           0.5909           0.0013            0.00s
 
Train:
	 explained variance: 0.976398815713
 
Test:
	 explained variance: 0.946019133501

In [8]:


In [18]:
linear_variable(1, 8, 70)(30)


Out[18]:
-2

In [ ]: