In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
In [ ]:
%matplotlib inline
In [ ]:
data = pd.read_csv("under_over_fitting.csv")
In [ ]:
good_poly_features = PolynomialFeatures(degree=3)
bad_poly_features = PolynomialFeatures(degree=5)
linear_reg_simple = LinearRegression()
linear_reg_good_poly = LinearRegression()
linear_reg_bad_poly = LinearRegression()
In [ ]:
features = np.array(data.feature).reshape(data.shape[0],1)
target = np.array(data.target)
In [ ]:
good_poly_features.fit(features,target)
bad_poly_features.fit(features, target)
In [ ]:
linear_reg_simple = linear_reg_simple.fit(features, target)
linear_reg_good_poly = linear_reg_good_poly.fit(good_poly_features.transform(features), target)
linear_reg_bad_poly = linear_reg_bad_poly.fit(bad_poly_features.transform(features), target)
In [ ]:
linear_reg_simple_prediction = linear_reg_simple.predict(features)
linear_reg_good_poly_prediction = linear_reg_good_poly.predict(good_poly_features.transform(features))
linear_reg_bad_poly_prediction = linear_reg_bad_poly.predict(bad_poly_features.transform(features))
In [ ]:
linear_reg_bad_poly_prediction, linear_reg_good_poly_prediction
In [ ]:
plt.scatter(features,target,label="actual")
plt.plot(features,linear_reg_simple_prediction,label="Underfitted model")
plt.plot(features,linear_reg_good_poly_prediction,label="Good model")
plt.plot(features,linear_reg_bad_poly_prediction,label="Overfitted model")
plt.legend(loc="best")
plt.show()
In [ ]:
plt.scatter(features,target,label="actual")
plt.plot(features,linear_reg_bad_poly_prediction,label="Good model")
plt.legend(loc="best")
plt.show()