In [7]:
import sklearn
from sklearn import ensemble, datasets, cross_validation, metrics
In [8]:
boston = datasets.load_boston()
boston.data.shape
Out[8]:
In [8]:
In [9]:
friedman1 = datasets.make_friedman1(n_samples=10000, n_features=20, noise=1.)
friedman1
friedman1[0].shape
Out[9]:
In [10]:
X = friedman1[0]
y = friedman1[1]
X_train, X_valid, y_train, y_valid = cross_validation.train_test_split(X, y, train_size=0.7)
In [11]:
X_train.shape
Out[11]:
In [12]:
def regression_report(y_true, y_pred):
print '\t explained variance:', metrics.regression.explained_variance_score(y_true, y_pred)
In [27]:
model_gbc = ensemble.GradientBoostingRegressor(alpha=0.9, init=None, learning_rate=0.1, loss='ls',
max_depth=6,
max_features=None,
#max_leaf_nodes=15,
min_samples_leaf=20, min_samples_split=40,
min_weight_fraction_leaf=0.0, n_estimators=70,
random_state=None, subsample=0.8, tree_params_producer=None,
verbose=1, warm_start=False)
model_gbc.fit(X_train, y_train)
print ' '
print 'Train:'
regression_report(y_train, model_gbc.predict(X_train))
print ' '
print 'Test:'
regression_report(y_valid, model_gbc.predict(X_valid))
In [28]:
def linear_variable(vfrom, vto, N):
return lambda i: int(i * 1. / N * (vto - vfrom) + vfrom)
def tree_params_producer_variable_depth(depth_from, depth_to, n_estimators):
variable_depth_foo = linear_variable(depth_from, depth_to, n_estimators)
return lambda stage: {
'max_depth': variable_depth_foo(stage),
'min_samples_split': 2,
'min_samples_leaf': 1,
'min_weight_fraction_leaf': 0.0,
'max_features': None,
'max_leaf_nodes': None}
model_gbc = ensemble.GradientBoostingRegressor(alpha=0.9, init=None, learning_rate=0.1, loss='ls',
#max_depth=8,
#max_features=None,
#max_leaf_nodes=15,
#min_samples_leaf=20, min_samples_split=40,
#min_weight_fraction_leaf=0.0,
n_estimators=80,
random_state=None,
subsample=0.8,
tree_params_producer=tree_params_producer_variable_depth(1, 10, 80),
verbose=1, warm_start=False)
model_gbc.fit(X_train, y_train)
print ' '
print 'Train:'
regression_report(y_train, model_gbc.predict(X_train))
print ' '
print 'Test:'
regression_report(y_valid, model_gbc.predict(X_valid))
In [8]:
In [18]:
linear_variable(1, 8, 70)(30)
Out[18]:
In [ ]: