In [1]:
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from rgf.sklearn import RGFRegressor
import matplotlib.pyplot as plt

In [2]:
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                    boston.target,
                                                    test_size=0.1,
                                                    random_state=42)

In [3]:
rgf = RGFRegressor(max_leaf=300,
                   algorithm="RGF_Sib",
                   test_interval=100,
                   loss="LS",
                   verbose=False)
rf = RandomForestRegressor(n_estimators=600,
                           min_samples_leaf=3,
                           max_depth=10,
                           random_state=42)
n_folds = 3

In [4]:
rgf_scores = cross_val_score(rgf,
                             X_train,
                             y_train,
                             scoring=make_scorer(mean_squared_error),
                             cv=n_folds)
rf_scores = cross_val_score(rf,
                            X_train,
                            y_train,
                            scoring=make_scorer(mean_squared_error),
                            cv=n_folds)

In [5]:
rgf_score = sum(rgf_scores)/n_folds
print('RGF Classfier MSE: {0:.5f}'.format(rgf_score))
rf_score = sum(rf_scores)/n_folds
print('Random Forest Classfier MSE: {0:.5f}'.format(rf_score))


RGF Classfier MSE: 12.59373
Random Forest Classfier MSE: 13.80435

In [6]:
y_pred_rgf = rgf.fit(X_train, y_train).predict(X_test)
y_pred_rf = rf.fit(X_train, y_train).predict(X_test)

In [7]:
%matplotlib inline

plt.figure(figsize=(9.5,5))
origin_plot = plt.scatter(X_test[:, 5], y_test, s=45, color="black")
rgf_plot = plt.scatter(X_test[:, 5], y_pred_rgf, s=45, color="red", alpha=0.6)
rf_plot = plt.scatter(X_test[:, 5], y_pred_rf, s=45, color="green", alpha=0.6)
plt.xlabel("Average number of rooms per dwelling")
plt.ylabel("Median value of owner-occupied homes in $1000's")
plt.legend([origin_plot, rgf_plot, rf_plot],
           ["Ground Truth", "RGF", "Random Forest"],
           loc="upper left")
plt.show()



In [ ]: