In [1]:
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from rgf.sklearn import RGFRegressor
import matplotlib.pyplot as plt
In [2]:
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
test_size=0.1,
random_state=42)
In [3]:
rgf = RGFRegressor(max_leaf=300,
algorithm="RGF_Sib",
test_interval=100,
loss="LS",
verbose=False)
rf = RandomForestRegressor(n_estimators=600,
min_samples_leaf=3,
max_depth=10,
random_state=42)
n_folds = 3
In [4]:
rgf_scores = cross_val_score(rgf,
X_train,
y_train,
scoring=make_scorer(mean_squared_error),
cv=n_folds)
rf_scores = cross_val_score(rf,
X_train,
y_train,
scoring=make_scorer(mean_squared_error),
cv=n_folds)
In [5]:
rgf_score = sum(rgf_scores)/n_folds
print('RGF Classfier MSE: {0:.5f}'.format(rgf_score))
rf_score = sum(rf_scores)/n_folds
print('Random Forest Classfier MSE: {0:.5f}'.format(rf_score))
In [6]:
y_pred_rgf = rgf.fit(X_train, y_train).predict(X_test)
y_pred_rf = rf.fit(X_train, y_train).predict(X_test)
In [7]:
%matplotlib inline
plt.figure(figsize=(9.5,5))
origin_plot = plt.scatter(X_test[:, 5], y_test, s=45, color="black")
rgf_plot = plt.scatter(X_test[:, 5], y_pred_rgf, s=45, color="red", alpha=0.6)
rf_plot = plt.scatter(X_test[:, 5], y_pred_rf, s=45, color="green", alpha=0.6)
plt.xlabel("Average number of rooms per dwelling")
plt.ylabel("Median value of owner-occupied homes in $1000's")
plt.legend([origin_plot, rgf_plot, rf_plot],
["Ground Truth", "RGF", "Random Forest"],
loc="upper left")
plt.show()
In [ ]: