Underfitted Vs Overfitted Model


In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

In [ ]:
%matplotlib inline

In [ ]:
data = pd.read_csv("under_over_fitting.csv")

In [ ]:
good_poly_features = PolynomialFeatures(degree=3)
bad_poly_features = PolynomialFeatures(degree=5)
linear_reg_simple = LinearRegression()
linear_reg_good_poly = LinearRegression()
linear_reg_bad_poly = LinearRegression()

In [ ]:
features = np.array(data.feature).reshape(data.shape[0],1)
target = np.array(data.target)

In [ ]:
good_poly_features.fit(features,target)
bad_poly_features.fit(features, target)

In [ ]:
linear_reg_simple = linear_reg_simple.fit(features, target)
linear_reg_good_poly = linear_reg_good_poly.fit(good_poly_features.transform(features), target)
linear_reg_bad_poly = linear_reg_bad_poly.fit(bad_poly_features.transform(features), target)

In [ ]:
linear_reg_simple_prediction = linear_reg_simple.predict(features)
linear_reg_good_poly_prediction = linear_reg_good_poly.predict(good_poly_features.transform(features))
linear_reg_bad_poly_prediction = linear_reg_bad_poly.predict(bad_poly_features.transform(features))

In [ ]:
linear_reg_bad_poly_prediction, linear_reg_good_poly_prediction

In [ ]:
plt.scatter(features,target,label="actual")
plt.plot(features,linear_reg_simple_prediction,label="Underfitted model")
plt.plot(features,linear_reg_good_poly_prediction,label="Good model")
plt.plot(features,linear_reg_bad_poly_prediction,label="Overfitted model")
plt.legend(loc="best")

plt.show()

In [ ]:
plt.scatter(features,target,label="actual")
plt.plot(features,linear_reg_bad_poly_prediction,label="Good model")
plt.legend(loc="best")

plt.show()