notebook.community

Edit and run



In [2]:

    
np.random.seed(0)
n_samples = 30
X = np.sort(np.random.rand(n_samples))
y = np.cos(1.5 * np.pi * X) + np.random.randn(n_samples) * 0.1

dfX = pd.DataFrame(X, columns=["x"])
dfX = sm.add_constant(dfX)
dfy = pd.DataFrame(y, columns=["y"])
df = pd.concat([dfX, dfy], axis=1)

print(sm.OLS.from_formula("y ~ x", data=df).fit().summary())
print("==========================================")
print(sm.OLS.from_formula("y ~ x + I(x**2)", data=df).fit().summary())
print("==========================================")
print(sm.OLS.from_formula("y ~ x + I(x**2) + I(x**3)", data=df).fit().summary())
print("==========================================")
print(sm.OLS.from_formula("y ~ x + I(x**2) + I(x**3) + I(x**4)", data=df).fit().summary())
print("==========================================")
print(sm.OLS.from_formula("y ~ x + I(x**2) + I(x**3) + I(x**4) + I(x**5)", data=df).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.474
Model:                            OLS   Adj. R-squared:                  0.455
Method:                 Least Squares   F-statistic:                     25.20
Date:                Wed, 08 Jun 2016   Prob (F-statistic):           2.63e-05
Time:                        06:14:50   Log-Likelihood:                -20.253
No. Observations:                  30   AIC:                             44.51
Df Residuals:                      28   BIC:                             47.31
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.5367      0.207      2.595      0.015       0.113       0.960
x             -1.6093      0.321     -5.020      0.000      -2.266      -0.953
==============================================================================
Omnibus:                        5.427   Durbin-Watson:                   0.202
Prob(Omnibus):                  0.066   Jarque-Bera (JB):                2.523
Skew:                           0.425   Prob(JB):                        0.283
Kurtosis:                       1.862   Cond. No.                         4.85
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
==========================================
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.931
Model:                            OLS   Adj. R-squared:                  0.926
Method:                 Least Squares   F-statistic:                     181.6
Date:                Wed, 08 Jun 2016   Prob (F-statistic):           2.19e-16
Time:                        06:14:50   Log-Likelihood:                 10.181
No. Observations:                  30   AIC:                            -14.36
Df Residuals:                      27   BIC:                            -10.16
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.5411      0.107     14.378      0.000       1.321       1.761
x             -7.3196      0.444    -16.498      0.000      -8.230      -6.409
I(x ** 2)      5.5596      0.416     13.355      0.000       4.705       6.414
==============================================================================
Omnibus:                        3.753   Durbin-Watson:                   0.889
Prob(Omnibus):                  0.153   Jarque-Bera (JB):                1.543
Skew:                           0.062   Prob(JB):                        0.462
Kurtosis:                       1.896   Cond. No.                         23.0
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
==========================================
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.971
Model:                            OLS   Adj. R-squared:                  0.968
Method:                 Least Squares   F-statistic:                     289.3
Date:                Wed, 08 Jun 2016   Prob (F-statistic):           4.40e-20
Time:                        06:14:50   Log-Likelihood:                 23.183
No. Observations:                  30   AIC:                            -38.37
Df Residuals:                      26   BIC:                            -32.76
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.1694      0.094     12.418      0.000       0.976       1.363
x             -2.1962      0.904     -2.428      0.022      -4.055      -0.337
I(x ** 2)     -7.0567      2.125     -3.321      0.003     -11.424      -2.689
I(x ** 3)      8.2029      1.370      5.988      0.000       5.387      11.018
==============================================================================
Omnibus:                        2.444   Durbin-Watson:                   1.982
Prob(Omnibus):                  0.295   Jarque-Bera (JB):                1.471
Skew:                           0.530   Prob(JB):                        0.479
Kurtosis:                       3.228   Cond. No.                         160.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
==========================================
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.973
Model:                            OLS   Adj. R-squared:                  0.969
Method:                 Least Squares   F-statistic:                     225.7
Date:                Wed, 08 Jun 2016   Prob (F-statistic):           3.17e-19
Time:                        06:14:50   Log-Likelihood:                 24.329
No. Observations:                  30   AIC:                            -38.66
Df Residuals:                      25   BIC:                            -31.65
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.0311      0.135      7.645      0.000       0.753       1.309
x              0.4675      2.089      0.224      0.825      -3.835       4.770
I(x ** 2)    -17.7895      7.900     -2.252      0.033     -34.060      -1.519
I(x ** 3)     23.5927     11.008      2.143      0.042       0.921      46.265
I(x ** 4)     -7.2629      5.156     -1.409      0.171     -17.882       3.357
==============================================================================
Omnibus:                        0.929   Durbin-Watson:                   2.192
Prob(Omnibus):                  0.628   Jarque-Bera (JB):                0.696
Skew:                           0.362   Prob(JB):                        0.706
Kurtosis:                       2.817   Cond. No.                         914.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
==========================================
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.974
Model:                            OLS   Adj. R-squared:                  0.969
Method:                 Least Squares   F-statistic:                     182.9
Date:                Wed, 08 Jun 2016   Prob (F-statistic):           2.73e-18
Time:                        06:14:51   Log-Likelihood:                 25.117
No. Observations:                  30   AIC:                            -38.23
Df Residuals:                      24   BIC:                            -29.83
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.1269      0.158      7.118      0.000       0.800       1.454
x             -2.1789      3.118     -0.699      0.491      -8.615       4.257
I(x ** 2)      0.8313     18.154      0.046      0.964     -36.637      38.299
I(x ** 3)    -26.1798     45.097     -0.581      0.567    -119.255      66.895
I(x ** 4)     48.6674     49.427      0.985      0.335     -53.346     150.681
I(x ** 5)    -22.3170     19.616     -1.138      0.266     -62.802      18.168
==============================================================================
Omnibus:                        1.531   Durbin-Watson:                   2.272
Prob(Omnibus):                  0.465   Jarque-Bera (JB):                1.042
Skew:                           0.455   Prob(JB):                        0.594
Kurtosis:                       2.936   Cond. No.                     4.62e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.62e+03. This might indicate that there are
strong multicollinearity or other numerical problems.



In [3]:

    
np.random.seed(0)
n_samples = 30
X = np.sort(np.random.rand(n_samples))
Y = np.cos(1.5 * np.pi * X ) + np.random.randn(n_samples) * 0.1

dfX = pd.DataFrame(X, columns=["x"])
dfX = sm.add_constant(dfX)
dfY = pd.DataFrame(Y, columns=["y"])
df = pd.concat([dfX,dfY],axis=1)



In [5]:

    
print(sm.OLS.from_formula("y ~ x", data=df).fit().summary())
print(sm.OLS.from_formula("y ~ x + I(x ** 2)", data=df).fit().summary())
print(sm.OLS.from_formula("y ~ x + I(x ** 2) + I(x ** 3)" , data=df).fit().summary())
print(sm.OLS.from_formula("y ~ x + I(x ** 2) + I(x ** 3) + I(x ** 4)", data=df).fit().summary())
print(sm.OLS.from_formula("y ~ x + I(x ** 2) + I(x ** 3) + I(x ** 4) + I(x ** 5)", data=df).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.474
Model:                            OLS   Adj. R-squared:                  0.455
Method:                 Least Squares   F-statistic:                     25.20
Date:                Wed, 08 Jun 2016   Prob (F-statistic):           2.63e-05
Time:                        06:21:25   Log-Likelihood:                -20.253
No. Observations:                  30   AIC:                             44.51
Df Residuals:                      28   BIC:                             47.31
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.5367      0.207      2.595      0.015       0.113       0.960
x             -1.6093      0.321     -5.020      0.000      -2.266      -0.953
==============================================================================
Omnibus:                        5.427   Durbin-Watson:                   0.202
Prob(Omnibus):                  0.066   Jarque-Bera (JB):                2.523
Skew:                           0.425   Prob(JB):                        0.283
Kurtosis:                       1.862   Cond. No.                         4.85
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.931
Model:                            OLS   Adj. R-squared:                  0.926
Method:                 Least Squares   F-statistic:                     181.6
Date:                Wed, 08 Jun 2016   Prob (F-statistic):           2.19e-16
Time:                        06:21:25   Log-Likelihood:                 10.181
No. Observations:                  30   AIC:                            -14.36
Df Residuals:                      27   BIC:                            -10.16
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.5411      0.107     14.378      0.000       1.321       1.761
x             -7.3196      0.444    -16.498      0.000      -8.230      -6.409
I(x ** 2)      5.5596      0.416     13.355      0.000       4.705       6.414
==============================================================================
Omnibus:                        3.753   Durbin-Watson:                   0.889
Prob(Omnibus):                  0.153   Jarque-Bera (JB):                1.543
Skew:                           0.062   Prob(JB):                        0.462
Kurtosis:                       1.896   Cond. No.                         23.0
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.971
Model:                            OLS   Adj. R-squared:                  0.968
Method:                 Least Squares   F-statistic:                     289.3
Date:                Wed, 08 Jun 2016   Prob (F-statistic):           4.40e-20
Time:                        06:21:26   Log-Likelihood:                 23.183
No. Observations:                  30   AIC:                            -38.37
Df Residuals:                      26   BIC:                            -32.76
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.1694      0.094     12.418      0.000       0.976       1.363
x             -2.1962      0.904     -2.428      0.022      -4.055      -0.337
I(x ** 2)     -7.0567      2.125     -3.321      0.003     -11.424      -2.689
I(x ** 3)      8.2029      1.370      5.988      0.000       5.387      11.018
==============================================================================
Omnibus:                        2.444   Durbin-Watson:                   1.982
Prob(Omnibus):                  0.295   Jarque-Bera (JB):                1.471
Skew:                           0.530   Prob(JB):                        0.479
Kurtosis:                       3.228   Cond. No.                         160.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.973
Model:                            OLS   Adj. R-squared:                  0.969
Method:                 Least Squares   F-statistic:                     225.7
Date:                Wed, 08 Jun 2016   Prob (F-statistic):           3.17e-19
Time:                        06:21:26   Log-Likelihood:                 24.329
No. Observations:                  30   AIC:                            -38.66
Df Residuals:                      25   BIC:                            -31.65
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.0311      0.135      7.645      0.000       0.753       1.309
x              0.4675      2.089      0.224      0.825      -3.835       4.770
I(x ** 2)    -17.7895      7.900     -2.252      0.033     -34.060      -1.519
I(x ** 3)     23.5927     11.008      2.143      0.042       0.921      46.265
I(x ** 4)     -7.2629      5.156     -1.409      0.171     -17.882       3.357
==============================================================================
Omnibus:                        0.929   Durbin-Watson:                   2.192
Prob(Omnibus):                  0.628   Jarque-Bera (JB):                0.696
Skew:                           0.362   Prob(JB):                        0.706
Kurtosis:                       2.817   Cond. No.                         914.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.974
Model:                            OLS   Adj. R-squared:                  0.969
Method:                 Least Squares   F-statistic:                     182.9
Date:                Wed, 08 Jun 2016   Prob (F-statistic):           2.73e-18
Time:                        06:21:26   Log-Likelihood:                 25.117
No. Observations:                  30   AIC:                            -38.23
Df Residuals:                      24   BIC:                            -29.83
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.1269      0.158      7.118      0.000       0.800       1.454
x             -2.1789      3.118     -0.699      0.491      -8.615       4.257
I(x ** 2)      0.8313     18.154      0.046      0.964     -36.637      38.299
I(x ** 3)    -26.1798     45.097     -0.581      0.567    -119.255      66.895
I(x ** 4)     48.6674     49.427      0.985      0.335     -53.346     150.681
I(x ** 5)    -22.3170     19.616     -1.138      0.266     -62.802      18.168
==============================================================================
Omnibus:                        1.531   Durbin-Watson:                   2.272
Prob(Omnibus):                  0.465   Jarque-Bera (JB):                1.042
Skew:                           0.455   Prob(JB):                        0.594
Kurtosis:                       2.936   Cond. No.                     4.62e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.62e+03. This might indicate that there are
strong multicollinearity or other numerical problems.



In [8]:

    
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

def polyreg(degree, seed=0, plot=True):
    polynomial_features = PolynomialFeatures(degree = degree)
    linear_regression = LinearRegression()
    model = Pipeline([
            ("polynomial_features", polynomial_features),
            ("linear_regression", linear_regression)
        ])
    
    np.random.seed(seed)
    n_samples = 30
    X = np.sort(np.random.rand(n_samples))
    Y = np.cos(1.5 * np.pi * X) + np.random.randn(n_samples) * 0.1
    X = X[:, np.newaxis]
    
    model.fit(X,Y)
    
    if plot:
        plt. scatter(X,Y)
        xx = np.linspace(0, 1, 1000)
        plt.plot(xx, model.predict(xx[:, np.newaxis]))
        plt.ylim(-2, 2)
        plt.show()
        
    reg = model.named_steps["linear_regression"]
    return reg.coef_, reg.intercept_



In [9]:

    
plt.subplot(251)
polyreg(1)
plt.subplot(252)
polyreg(2)









    
















    Out[9]:





(array([ 0.        , -1.60931179]), 0.53668033031787343)



In [10]:









    
















    Out[10]:





(array([ 0.        , -7.31956683,  5.55955392]), 1.5411486872392575)



In [ ]: