In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn import model_selection
from sklearn import metrics
# generate some data
np.random.seed(5)
X = np.arange(0, 100)
y = 20 + 3 * X + np.random.normal(0, 80, 100)
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33, random_state=42)
plt.scatter(X_train, y_train, color='green', label="training data")
plt.scatter(X_test, y_test, color='blue', label="test data")
# Create xgboost regression model
model = xgb.XGBRegressor()
# Train the model using the training sets
model.fit([[x] for x in X_train], y_train)
y_pred = model.predict([[x] for x in X_test])
order = np.argsort(X_test)
plt.plot(np.array(X_test)[order], np.array(y_pred)[order], color='red', linewidth=3, linestyle='solid', label="model")
plt.legend()
plt.draw()
print('Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))
print('R^2 score: %.2f' % metrics.r2_score(y_test, y_pred))
In [2]:
np.random.seed(5)
X = np.arange(0, 100)
y = np.power(X, 2) + np.random.normal(0, 500, 100)
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33, random_state=42)
plt.scatter(X_train, y_train, color='green', label="training data")
plt.scatter(X_test, y_test, color='blue', label="test data")
model = xgb.XGBRegressor()
# Train the model using the training sets
model.fit([[x] for x in X_train], y_train)
y_pred = model.predict([[x] for x in X_test])
order = np.argsort(X_test)
plt.plot(np.array(X_test)[order], np.array(y_pred)[order], color='red', linewidth=3, linestyle='solid', label="model")
plt.legend()
plt.draw()
print('Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))
print('R^2 score: %.2f' % metrics.r2_score(y_test, y_pred))