In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from lolopy.learners import RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor as SKRFRegressor
from sklearn.datasets import load_boston
import numpy as np
In [2]:
X, y = load_boston(True)
print('Training set size:', X.shape)
In [3]:
model = SKRFRegressor(n_estimators=len(X))
In [4]:
%%time
model.fit(X, y)
Out[4]:
In [5]:
%%time
sk_pred = model.predict(X)
In [6]:
model = RandomForestRegressor(num_trees=len(X))
In [7]:
%%time
model.fit(X, y)
Out[7]:
In [8]:
%%time
lolo_pred, lolo_std = model.predict(X, return_std=True)
Note that it follows the same API as the scikit-learn model
In [9]:
fig, axs = plt.subplots(1, 2, sharey=True)
axs[0].errorbar(y, lolo_pred, lolo_std, fmt='o', ms=2.5, ecolor='gray')
axs[1].scatter(y, sk_pred, s=5)
lim = [0, 55]
for ax, n in zip(axs, ['Lolo', 'sklearn']):
ax.set_xlim(lim)
ax.set_ylim(lim)
ax.set_xlabel('House Price, True (k$)')
ax.plot(lim, lim, 'k--')
ax.text(5, 50, n, fontsize=16)
axs[0].set_ylabel('House Price, Predicted (k$)')
fig.set_size_inches(6, 3)
fig.tight_layout()
Lolo produces a Random Forest model very close to what scikit-learn does and can do error bars
In [ ]: