In [39]:
%matplotlib inline
plt.rcParams['figure.figsize'] = (15, 9)
import numpy as np
from matplotlib import pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import cross_val_score
In [40]:
np.random.seed(0)
n_samples = 30
true_fn = lambda X: np.cos(1.5 * np.pi * X)
X = np.sort(np.random.rand(n_samples))
y = true_fn(X) + np.random.randn(n_samples) * 0.1
continuous_X = np.linspace(0, 1, 500)
plt.plot(continuous_X, true_fn(continuous_X),
c='aquamarine', label='Signal')
plt.scatter(X, y, label='Samples')
print('Signal and noisy samples taken')
plt.legend()
plt.show()
In [41]:
mses = []
stds = []
min_mse = np.iinfo(np.int32).max
best_model = None
pipelines = []
degrees = range(1, 16)
for d in degrees:
poly_features = PolynomialFeatures(degree=d, include_bias=False)
model = LinearRegression()
pipeline = Pipeline([('polynomial_features', poly_features),
('linear_regression', model)])
pipeline.fit(X[:, np.newaxis], y)
scores = cross_val_score(pipeline, X[:, np.newaxis], y,
scoring='mean_squared_error', cv=10)
mses.append(-scores.mean())
stds.append(scores.std())
pipelines.append(pipeline)
if -scores.mean() < min_mse:
min_mse = -scores.mean()
best_model = pipeline
In [42]:
height = 4
width = 4
n = 0
fig, axs = plt.subplots(height, width)
for h in range(height):
for w in range(width):
ax = axs[h, w]
ax.set_xticklabels([])
ax.set_yticklabels([])
if h == w == 0:
ax.set_title('Raw data', fontsize=10)
ax.scatter(X, y, color='teal', s=7)
else:
p = pipelines[n]
n += 1
ax.set_title('{} degrees'.format(n), fontsize=10)
ax.plot(X, p.predict(X[:, np.newaxis]), color='teal')
print('Plots varying degrees of coefficients for independent (and Raw data)')
In [43]:
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(degrees, mses, label='MSE', color='cornflowerblue')
ax2.plot(degrees, stds, label='Sigma', color='teal')
ax1.set_yscale('log')
ax2.set_yscale('log')
ax1.set_xlabel('Degrees')
ax1.set_ylabel('Mean sq error', color='cornflowerblue')
ax2.set_ylabel('Std deviation', color='teal')
print('Approximating noisy signal varying polynomial coefficients')
In [44]:
for d, m, s in zip(degrees, mses, stds):
print('Degree {:>2}: mse = {:16.3f}, std = {:16.3f}'.format(d, m, s))
In [45]:
plt.plot(continuous_X, true_fn(continuous_X),
c='aquamarine', label='Signal')
plt.plot(X, best_model.predict(X[:, np.newaxis]),
c='teal', linestyle='-.', label='Model')
plt.scatter(X, y, label='Samples')
print('Signal, samples and best model')
plt.legend()
plt.show()