In [2]:
np.random.seed(0)
n_samples = 30
X = np.sort(np.random.rand(n_samples))
y = np.cos(1.5 * np.pi * X) + np.random.randn(n_samples) * 0.1
dfX = pd.DataFrame(X, columns=["x"])
dfX = sm.add_constant(dfX)
dfy = pd.DataFrame(y, columns=["y"])
df = pd.concat([dfX, dfy], axis=1)
print(sm.OLS.from_formula("y ~ x", data=df).fit().summary())
print("==========================================")
print(sm.OLS.from_formula("y ~ x + I(x**2)", data=df).fit().summary())
print("==========================================")
print(sm.OLS.from_formula("y ~ x + I(x**2) + I(x**3)", data=df).fit().summary())
print("==========================================")
print(sm.OLS.from_formula("y ~ x + I(x**2) + I(x**3) + I(x**4)", data=df).fit().summary())
print("==========================================")
print(sm.OLS.from_formula("y ~ x + I(x**2) + I(x**3) + I(x**4) + I(x**5)", data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.474
Model: OLS Adj. R-squared: 0.455
Method: Least Squares F-statistic: 25.20
Date: Wed, 08 Jun 2016 Prob (F-statistic): 2.63e-05
Time: 06:14:50 Log-Likelihood: -20.253
No. Observations: 30 AIC: 44.51
Df Residuals: 28 BIC: 47.31
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 0.5367 0.207 2.595 0.015 0.113 0.960
x -1.6093 0.321 -5.020 0.000 -2.266 -0.953
==============================================================================
Omnibus: 5.427 Durbin-Watson: 0.202
Prob(Omnibus): 0.066 Jarque-Bera (JB): 2.523
Skew: 0.425 Prob(JB): 0.283
Kurtosis: 1.862 Cond. No. 4.85
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
==========================================
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.931
Model: OLS Adj. R-squared: 0.926
Method: Least Squares F-statistic: 181.6
Date: Wed, 08 Jun 2016 Prob (F-statistic): 2.19e-16
Time: 06:14:50 Log-Likelihood: 10.181
No. Observations: 30 AIC: -14.36
Df Residuals: 27 BIC: -10.16
Df Model: 2
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.5411 0.107 14.378 0.000 1.321 1.761
x -7.3196 0.444 -16.498 0.000 -8.230 -6.409
I(x ** 2) 5.5596 0.416 13.355 0.000 4.705 6.414
==============================================================================
Omnibus: 3.753 Durbin-Watson: 0.889
Prob(Omnibus): 0.153 Jarque-Bera (JB): 1.543
Skew: 0.062 Prob(JB): 0.462
Kurtosis: 1.896 Cond. No. 23.0
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
==========================================
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.971
Model: OLS Adj. R-squared: 0.968
Method: Least Squares F-statistic: 289.3
Date: Wed, 08 Jun 2016 Prob (F-statistic): 4.40e-20
Time: 06:14:50 Log-Likelihood: 23.183
No. Observations: 30 AIC: -38.37
Df Residuals: 26 BIC: -32.76
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.1694 0.094 12.418 0.000 0.976 1.363
x -2.1962 0.904 -2.428 0.022 -4.055 -0.337
I(x ** 2) -7.0567 2.125 -3.321 0.003 -11.424 -2.689
I(x ** 3) 8.2029 1.370 5.988 0.000 5.387 11.018
==============================================================================
Omnibus: 2.444 Durbin-Watson: 1.982
Prob(Omnibus): 0.295 Jarque-Bera (JB): 1.471
Skew: 0.530 Prob(JB): 0.479
Kurtosis: 3.228 Cond. No. 160.
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
==========================================
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.973
Model: OLS Adj. R-squared: 0.969
Method: Least Squares F-statistic: 225.7
Date: Wed, 08 Jun 2016 Prob (F-statistic): 3.17e-19
Time: 06:14:50 Log-Likelihood: 24.329
No. Observations: 30 AIC: -38.66
Df Residuals: 25 BIC: -31.65
Df Model: 4
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.0311 0.135 7.645 0.000 0.753 1.309
x 0.4675 2.089 0.224 0.825 -3.835 4.770
I(x ** 2) -17.7895 7.900 -2.252 0.033 -34.060 -1.519
I(x ** 3) 23.5927 11.008 2.143 0.042 0.921 46.265
I(x ** 4) -7.2629 5.156 -1.409 0.171 -17.882 3.357
==============================================================================
Omnibus: 0.929 Durbin-Watson: 2.192
Prob(Omnibus): 0.628 Jarque-Bera (JB): 0.696
Skew: 0.362 Prob(JB): 0.706
Kurtosis: 2.817 Cond. No. 914.
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
==========================================
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.974
Model: OLS Adj. R-squared: 0.969
Method: Least Squares F-statistic: 182.9
Date: Wed, 08 Jun 2016 Prob (F-statistic): 2.73e-18
Time: 06:14:51 Log-Likelihood: 25.117
No. Observations: 30 AIC: -38.23
Df Residuals: 24 BIC: -29.83
Df Model: 5
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.1269 0.158 7.118 0.000 0.800 1.454
x -2.1789 3.118 -0.699 0.491 -8.615 4.257
I(x ** 2) 0.8313 18.154 0.046 0.964 -36.637 38.299
I(x ** 3) -26.1798 45.097 -0.581 0.567 -119.255 66.895
I(x ** 4) 48.6674 49.427 0.985 0.335 -53.346 150.681
I(x ** 5) -22.3170 19.616 -1.138 0.266 -62.802 18.168
==============================================================================
Omnibus: 1.531 Durbin-Watson: 2.272
Prob(Omnibus): 0.465 Jarque-Bera (JB): 1.042
Skew: 0.455 Prob(JB): 0.594
Kurtosis: 2.936 Cond. No. 4.62e+03
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.62e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
In [3]:
np.random.seed(0)
n_samples = 30
X = np.sort(np.random.rand(n_samples))
Y = np.cos(1.5 * np.pi * X ) + np.random.randn(n_samples) * 0.1
dfX = pd.DataFrame(X, columns=["x"])
dfX = sm.add_constant(dfX)
dfY = pd.DataFrame(Y, columns=["y"])
df = pd.concat([dfX,dfY],axis=1)
In [5]:
print(sm.OLS.from_formula("y ~ x", data=df).fit().summary())
print(sm.OLS.from_formula("y ~ x + I(x ** 2)", data=df).fit().summary())
print(sm.OLS.from_formula("y ~ x + I(x ** 2) + I(x ** 3)" , data=df).fit().summary())
print(sm.OLS.from_formula("y ~ x + I(x ** 2) + I(x ** 3) + I(x ** 4)", data=df).fit().summary())
print(sm.OLS.from_formula("y ~ x + I(x ** 2) + I(x ** 3) + I(x ** 4) + I(x ** 5)", data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.474
Model: OLS Adj. R-squared: 0.455
Method: Least Squares F-statistic: 25.20
Date: Wed, 08 Jun 2016 Prob (F-statistic): 2.63e-05
Time: 06:21:25 Log-Likelihood: -20.253
No. Observations: 30 AIC: 44.51
Df Residuals: 28 BIC: 47.31
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 0.5367 0.207 2.595 0.015 0.113 0.960
x -1.6093 0.321 -5.020 0.000 -2.266 -0.953
==============================================================================
Omnibus: 5.427 Durbin-Watson: 0.202
Prob(Omnibus): 0.066 Jarque-Bera (JB): 2.523
Skew: 0.425 Prob(JB): 0.283
Kurtosis: 1.862 Cond. No. 4.85
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.931
Model: OLS Adj. R-squared: 0.926
Method: Least Squares F-statistic: 181.6
Date: Wed, 08 Jun 2016 Prob (F-statistic): 2.19e-16
Time: 06:21:25 Log-Likelihood: 10.181
No. Observations: 30 AIC: -14.36
Df Residuals: 27 BIC: -10.16
Df Model: 2
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.5411 0.107 14.378 0.000 1.321 1.761
x -7.3196 0.444 -16.498 0.000 -8.230 -6.409
I(x ** 2) 5.5596 0.416 13.355 0.000 4.705 6.414
==============================================================================
Omnibus: 3.753 Durbin-Watson: 0.889
Prob(Omnibus): 0.153 Jarque-Bera (JB): 1.543
Skew: 0.062 Prob(JB): 0.462
Kurtosis: 1.896 Cond. No. 23.0
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.971
Model: OLS Adj. R-squared: 0.968
Method: Least Squares F-statistic: 289.3
Date: Wed, 08 Jun 2016 Prob (F-statistic): 4.40e-20
Time: 06:21:26 Log-Likelihood: 23.183
No. Observations: 30 AIC: -38.37
Df Residuals: 26 BIC: -32.76
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.1694 0.094 12.418 0.000 0.976 1.363
x -2.1962 0.904 -2.428 0.022 -4.055 -0.337
I(x ** 2) -7.0567 2.125 -3.321 0.003 -11.424 -2.689
I(x ** 3) 8.2029 1.370 5.988 0.000 5.387 11.018
==============================================================================
Omnibus: 2.444 Durbin-Watson: 1.982
Prob(Omnibus): 0.295 Jarque-Bera (JB): 1.471
Skew: 0.530 Prob(JB): 0.479
Kurtosis: 3.228 Cond. No. 160.
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.973
Model: OLS Adj. R-squared: 0.969
Method: Least Squares F-statistic: 225.7
Date: Wed, 08 Jun 2016 Prob (F-statistic): 3.17e-19
Time: 06:21:26 Log-Likelihood: 24.329
No. Observations: 30 AIC: -38.66
Df Residuals: 25 BIC: -31.65
Df Model: 4
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.0311 0.135 7.645 0.000 0.753 1.309
x 0.4675 2.089 0.224 0.825 -3.835 4.770
I(x ** 2) -17.7895 7.900 -2.252 0.033 -34.060 -1.519
I(x ** 3) 23.5927 11.008 2.143 0.042 0.921 46.265
I(x ** 4) -7.2629 5.156 -1.409 0.171 -17.882 3.357
==============================================================================
Omnibus: 0.929 Durbin-Watson: 2.192
Prob(Omnibus): 0.628 Jarque-Bera (JB): 0.696
Skew: 0.362 Prob(JB): 0.706
Kurtosis: 2.817 Cond. No. 914.
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.974
Model: OLS Adj. R-squared: 0.969
Method: Least Squares F-statistic: 182.9
Date: Wed, 08 Jun 2016 Prob (F-statistic): 2.73e-18
Time: 06:21:26 Log-Likelihood: 25.117
No. Observations: 30 AIC: -38.23
Df Residuals: 24 BIC: -29.83
Df Model: 5
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.1269 0.158 7.118 0.000 0.800 1.454
x -2.1789 3.118 -0.699 0.491 -8.615 4.257
I(x ** 2) 0.8313 18.154 0.046 0.964 -36.637 38.299
I(x ** 3) -26.1798 45.097 -0.581 0.567 -119.255 66.895
I(x ** 4) 48.6674 49.427 0.985 0.335 -53.346 150.681
I(x ** 5) -22.3170 19.616 -1.138 0.266 -62.802 18.168
==============================================================================
Omnibus: 1.531 Durbin-Watson: 2.272
Prob(Omnibus): 0.465 Jarque-Bera (JB): 1.042
Skew: 0.455 Prob(JB): 0.594
Kurtosis: 2.936 Cond. No. 4.62e+03
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.62e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
In [8]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
def polyreg(degree, seed=0, plot=True):
polynomial_features = PolynomialFeatures(degree = degree)
linear_regression = LinearRegression()
model = Pipeline([
("polynomial_features", polynomial_features),
("linear_regression", linear_regression)
])
np.random.seed(seed)
n_samples = 30
X = np.sort(np.random.rand(n_samples))
Y = np.cos(1.5 * np.pi * X) + np.random.randn(n_samples) * 0.1
X = X[:, np.newaxis]
model.fit(X,Y)
if plot:
plt. scatter(X,Y)
xx = np.linspace(0, 1, 1000)
plt.plot(xx, model.predict(xx[:, np.newaxis]))
plt.ylim(-2, 2)
plt.show()
reg = model.named_steps["linear_regression"]
return reg.coef_, reg.intercept_
In [9]:
plt.subplot(251)
polyreg(1)
plt.subplot(252)
polyreg(2)
Out[9]:
(array([ 0. , -1.60931179]), 0.53668033031787343)
In [10]:
Out[10]:
(array([ 0. , -7.31956683, 5.55955392]), 1.5411486872392575)
In [ ]:
Content source: rrbb014/fastcampus_dss
Similar notebooks: