Demo - Regression with XGBoost


In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn import model_selection
from sklearn import metrics

# generate some data
np.random.seed(5)
X = np.arange(0, 100)
y = 20 + 3 * X + np.random.normal(0, 80, 100)

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33, random_state=42)

plt.scatter(X_train, y_train, color='green', label="training data")
plt.scatter(X_test, y_test, color='blue', label="test data")


# Create xgboost regression model
model = xgb.XGBRegressor()

# Train the model using the training sets
model.fit([[x] for x in X_train], y_train)

y_pred = model.predict([[x] for x in X_test])


order = np.argsort(X_test)
plt.plot(np.array(X_test)[order], np.array(y_pred)[order], color='red', linewidth=3, linestyle='solid', label="model")
plt.legend()

plt.draw()

print('Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))
print('R^2 score: %.2f' % metrics.r2_score(y_test, y_pred))


Mean squared error: 8520.88
R^2 score: 0.50

In [2]:
np.random.seed(5)
X = np.arange(0, 100)
y = np.power(X, 2) + np.random.normal(0, 500, 100)

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33, random_state=42)

plt.scatter(X_train, y_train, color='green', label="training data")
plt.scatter(X_test, y_test, color='blue', label="test data")

model = xgb.XGBRegressor()

# Train the model using the training sets
model.fit([[x] for x in X_train], y_train)

y_pred = model.predict([[x] for x in X_test])



order = np.argsort(X_test)
plt.plot(np.array(X_test)[order], np.array(y_pred)[order], color='red', linewidth=3, linestyle='solid', label="model")
plt.legend()

plt.draw()

print('Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))
print('R^2 score: %.2f' % metrics.r2_score(y_test, y_pred))


Mean squared error: 339237.44
R^2 score: 0.96