Demo - Linear Regression with scikit-learn


In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn import model_selection
from sklearn import metrics

# generate some data
np.random.seed(5)
X = np.arange(0, 100)
y = 20 + 3 * X + np.random.normal(0, 80, 100)


X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33, random_state=42)

plt.scatter(X_train, y_train, color='green', label="training data")
plt.scatter(X_test, y_test, color='blue', label="test data")

# Create linear regression model
model = linear_model.LinearRegression()

# Train the model using the training sets
model.fit([[x] for x in X_train], y_train)

y_pred = model.predict([[x] for x in X_test])



order = np.argsort(X_test)
plt.plot(np.array(X_test)[order], np.array(y_pred)[order], color='red', linewidth=3, linestyle='solid', label="model")
plt.legend()

plt.draw()

print('Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))
print('R^2 score: %.2f' % metrics.r2_score(y_test, y_pred))
print('model: y(x) = %.2f + %.2f * x' % (model.intercept_, model.coef_))


Mean squared error: 6253.29
R^2 score: 0.64
model: y(x) = 44.95 + 2.63 * x

In [2]:
np.random.seed(5)
X = np.arange(0, 100)
y = np.power(X, 2) + np.random.normal(0, 500, 100)

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.33, random_state=42)

plt.scatter(X_train, y_train, color='green', label="training data")
plt.scatter(X_test, y_test, color='blue', label="test data")

# Create linear regression model
model = linear_model.LinearRegression()

# Train the model using the training sets
model.fit([[x] for x in X_train], y_train)

y_pred = model.predict([[x] for x in X_test])



order = np.argsort(X_test)
plt.plot(np.array(X_test)[order], np.array(y_pred)[order], color='red', linewidth=3, linestyle='solid', label="model")
plt.legend()

plt.draw()

print('Mean squared error: %.2f' % metrics.mean_squared_error(y_test, y_pred))
print('R^2 score: %.2f' % metrics.r2_score(y_test, y_pred))
print('model: y(x) = %.2f + %.2f * x' % (model.intercept_, model.coef_))


Mean squared error: 593541.56
R^2 score: 0.93
model: y(x) = -1542.57 + 98.30 * x