In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [26]:
x = np.arange(0,9)
y = (x-4)**3 -8*(x-4)+ np.random.normal(0,10,x.shape[0])

sns.regplot(x, y, fit_reg=False)


Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5a69366ba8>

In [27]:
X = x.reshape(x.shape[0],1)
X.shape


Out[27]:
(9, 1)

In [28]:
## underfitting
from sklearn import linear_model
clf = linear_model.LinearRegression()
clf.fit (X, y)

print(clf.coef_, clf.intercept_)

ax = sns.regplot(x, y, fit_reg=False)
plt.plot(x, X*clf.coef_+clf.intercept_)
ax.set_xlim(-1,9)
plt.title("degree=1 (underfit)", size=15)
plt.show()


[ 3.22572312] -10.2608775604

In [29]:
## Right fit
from sklearn import linear_model
X = np.array([x, x**2, x**3]).T

clf3 = linear_model.LinearRegression()
clf3.fit (X, y)

print(clf3.coef_, clf3.intercept_)

t3 = np.linspace(-0.5,8.5,num=100)
X3 = np.array([t3, t3**2, t3**3]).T

ax = sns.regplot(x, y, fit_reg=False)
plt.plot(t3, np.sum(X3*clf3.coef_, axis=1)+clf3.intercept_)
ax.set_xlim(-1,9)
plt.title("degree=3", size=15)
plt.show()


[ 35.51396667  -8.70796021   0.62500733] -32.0344753696

In [30]:
## Overfit
from sklearn import linear_model
X8 = np.array([x, x**2, x**3, x**4, x**5, x**6, x**7, x**8]).T

clf8 = linear_model.LinearRegression()
clf8.fit (X8, y)

print(clf.coef_, clf.intercept_)

t8 = np.linspace(-0.5,8.5,num=100)
X8 = np.array([t8, t8**2, t8**3, t8**4, t8**5, t8**6, t8**7, t8**8]).T

ax = sns.regplot(x, y, fit_reg=False)
plt.plot(t8, np.sum(X8*clf8.coef_, axis=1)+clf8.intercept_)
ax.set_xlim(-1,9)
plt.title("degree=8 (overfit)", size=15)
plt.show()


[ 3.22572312] -10.2608775604

In [24]:
fig = plt.figure(figsize=(18,5))
ax1 = fig.add_subplot(131)
ax1.scatter(x,y, s=30)
ax1.plot(x, x*clf.coef_+clf.intercept_, color='g', linewidth=4)
ax1.set_xlim(-1,9)
ax1.set_title("degree=1 (underfit)", size=20)

ax2 = fig.add_subplot(132)
ax2.scatter(x, y, s=30)
ax2.plot(t3, np.sum(X3*clf3.coef_, axis=1)+clf3.intercept_, 
         color='g', linewidth=4)
ax2.set_xlim(-1,9)
ax2.set_title("degree=3", size=20)

ax3=fig.add_subplot(133)
ax3.scatter(x, y, s=30)
ax3.plot(t8, np.sum(X8*clf8.coef_, axis=1)+clf8.intercept_, 
        color='g', linewidth=4)
ax3.set_xlim(-1,9)
ax3.set_title("degree=8 (overfit)", size=20)


plt.show()



In [ ]:

tips = sns.load_dataset("tips") tips


In [ ]: