Gradient boosting

This is a minimal but workable implementation of Gradient Boosting Decision Trees with scikit-learn

Read more in this blog post.


In [1]:
import numpy as np
from scipy.special import expit as sigmoid
from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import make_regression
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [2]:
class BaseGradientBoosting(BaseEstimator):
    def __init__(self, n_iter, learning_rate, loss):
        self.loss = loss
        self.learning_rate = learning_rate
        self.n_iter = n_iter

    def fit(self, X, y):
        rng = np.random.RandomState(seed=0)

        n_samples = X.shape[0]
        y_pred_train = rng.normal(size=n_samples)

        self.predictors = list()

        for m in range(self.n_iter):  # Gradient Descent

            negative_gradient = -self.loss.compute_gradients(y, y_pred_train)
            new_predictor = DecisionTreeRegressor(max_depth=3)
            new_predictor.fit(X, y=self.learning_rate * negative_gradient)
            y_pred_train += new_predictor.predict(X)

            self.predictors.append(new_predictor)  # save for predict()

    def predict(self, X):
        return sum(predictor.predict(X) for predictor in self.predictors)

In [3]:
# GBDT for regression

class LeastSquaresLoss:
    def compute_gradients(self, y_true, y_pred):
        return -2 * (y_true - y_pred)
    

class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
    
    def __init__(self, n_iter=100, learning_rate=.1):
        super().__init__(n_iter, learning_rate, loss=LeastSquaresLoss())


X, y = make_regression(n_samples=1000, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
gb = GradientBoostingRegressor()
gb.fit(X_train, y_train)

print('r squared on training data: {:.3f}'.format(gb.score(X_train, y_train)))
print('r squared on test data: {:.3f}'.format(gb.score(X_test, y_test)))


r squared on training data: 0.997
r squared on test data: 0.937

In [4]:
# GBDT for classification

class BinaryCrossEntropy:
    def compute_gradients(self, y_true, y_pred):
        return sigmoid(y_pred) - y_true
    def raw_predictions_to_proba(self, raw_predictions):
        return sigmoid(raw_predictions)


class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):

    def __init__(self, n_iter=100, learning_rate=.1):
        super().__init__(n_iter, learning_rate, loss=BinaryCrossEntropy())

    def predict(self, X):
        raw_predictions = super().predict(X)
        proba_positive_class = self.loss.raw_predictions_to_proba(raw_predictions)
        return proba_positive_class > .5

X, y = make_classification(n_samples=1000, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)

print('accuracy on training data: {:.3f}'.format(gb.score(X_train, y_train)))
print('accuracy on test data: {:.3f}'.format(gb.score(X_test, y_test)))


accuracy on training data: 0.981
accuracy on test data: 0.956