In [1]:
import numpy as np
import scipy.stats as st
import sklearn.linear_model as lm
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def f(x):
    y = 3 * x * x + 100
    return y

In [3]:
x_tr = np.linspace(0, 10, 1000)
y_tr = f(x_tr)

In [4]:
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

In [5]:
y = f(x) + np.random.randn(len(x))
#y = f(x)

In [6]:
# plot the utopist f(x) with a black dotted line
plt.plot(x_tr[:1000], y_tr[:1000], '--k')

# plot the 10 training data samples of x,y with a marketsize of 7
plt.plot(x, y, 'ok', ms=7)


Out[6]:
[<matplotlib.lines.Line2D at 0x7f32acdef160>]

In [7]:
# let's a linear regression model
lr = lm.LinearRegression()

# let's train the model with the 10 samples defined above
lr.fit(x[:, np.newaxis],y)

# let's predict the expected y (i.e.: y^) from the model
y_lr = lr.predict(x_tr[:, np.newaxis])

In [8]:
# plot the utopist f(x) with a black dotted line
plt.plot(x_tr, y_tr, '--k')

# plot the x,y predicted by the linear-regression model
plt.plot(x_tr, y_lr, 'g')

# plot the 10 samples used for training
plt.plot(x, y, 'ok', ms=10)
plt.xlim(0, 10)
plt.ylim(y.min()-1, y.max()+1)
plt.title("Linear Regression")


Out[8]:
Text(0.5, 1.0, 'Linear Regression')

In [9]:
lrp = lm.LinearRegression()
plt.plot(x_tr, y_tr, '--k')
for deg in [1, 2, 3, 4, 5, 6]:
    lrp.fit(np.vander(x, deg + 1), y)
    y_lrp = lrp.predict(np.vander(x_tr, deg + 1))
    plt.plot(x_tr, y_lrp, label='degree ' + str(deg))
    plt.legend(loc=2)
    plt.xlim(0, 10)
    plt.ylim(0, 1000)
    # Print the model's coefficients.
    print(' '.join(['%.2f' % c for c in lrp.coef_]))
plt.plot(x, y, 'ok', ms=10)
plt.title("Linear regression")


30.05 0.00
2.98 0.29 0.00
0.03 2.60 1.73 0.00
0.01 -0.08 3.28 0.37 0.00
0.00 -0.05 0.37 1.66 2.33 0.00
0.00 -0.01 0.08 -0.27 3.14 1.12 0.00
Out[9]:
Text(0.5, 1.0, 'Linear regression')

In [10]:
ridge = lm.RidgeCV()
plt.plot(x_tr, y_tr, '--k')
      
for deg in [1, 2, 3, 4, 5, 6]:
    ridge.fit(np.vander(x, deg + 1), y);
    y_ridge = ridge.predict(np.vander(x_tr, deg+1))
    plt.plot(x_tr, y_ridge, label='degree ' + str(deg))
    plt.legend(loc=2)
    plt.xlim(0, 10)
    plt.ylim(0, 1000)
    # Print the model's coefficients.
    print(' '.join(['%.2f' % c for c in ridge.coef_]))
      
plt.plot(x, y, 'ok', ms=10)
plt.title("Ridge regression")


29.78 0.00
2.97 0.28 0.00
0.02 2.74 0.77 0.00
0.00 -0.07 3.19 0.54 0.00
0.00 -0.04 0.32 1.89 1.92 0.00
0.00 -0.00 -0.00 0.14 2.24 1.74 0.00
Out[10]:
Text(0.5, 1.0, 'Ridge regression')