notebook.community

Edit and run



In [1]:

    
%matplotlib inline
import numpy as np
import scipy.stats as stats
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import seaborn as sns



In [2]:

    
b0 = 0.5
b1 = 0.1
# from 1 to 100 in 1000 increments
x = np.linspace(1, 100, 1000)
# select a random subset of x values
x = np.random.choice(x, 100, replace=True)
# generate noise from a normal distribution
noise1 = stats.norm.rvs(loc=0.0, scale=10.0, size=len(x))
noise2 = stats.norm.rvs(loc=0.0, scale=1.0, size=len(x))
y1 = b0 + b1*x + noise1
y2 = b0 + b1*x + noise2
df = pd.DataFrame({'x': x, 'y1': y1, 'y2': y2})



In [3]:

    
sns.jointplot('x', 'y1', data=df, kind='reg', xlim=(-5, 105), color="r", size=7)









    Out[3]:





<seaborn.axisgrid.JointGrid at 0x10cd74e80>



In [4]:

    
model = smf.ols(formula='y1 ~ x', data=df).fit()
model.summary()









    Out[4]:





OLS Regression Results

  Dep. Variable:            y1           R-squared:             0.044


  Model:                    OLS          Adj. R-squared:        0.034


  Method:              Least Squares     F-statistic:           4.497


  Date:              Sat, 06 May 2017    Prob (F-statistic):   0.0365 


  Time:                  10:41:13        Log-Likelihood:      -371.18


  No. Observations:          100         AIC:                   746.4


  Df Residuals:               98         BIC:                   751.6


  Df Model:                    1                                     


  Covariance Type:       nonrobust                                   




               coef      std err       t       P>|t|   [0.025     0.975]  


  Intercept      2.3566      2.050      1.150   0.253     -1.712      6.425


  x              0.0738      0.035      2.121   0.036      0.005      0.143




  Omnibus:         4.639    Durbin-Watson:         1.885


  Prob(Omnibus):   0.098    Jarque-Bera (JB):      4.475


  Skew:           -0.518    Prob(JB):              0.107


  Kurtosis:        2.956    Cond. No.               121.



In [5]:

    
sns.jointplot('x', 'y2', data=df, kind='reg', xlim=(-5, 105), size=7)









    Out[5]:





<seaborn.axisgrid.JointGrid at 0x12036a4e0>



In [6]:

    
model = smf.ols(formula='y2 ~ x', data=df).fit()
model.summary()









    Out[6]:





OLS Regression Results

  Dep. Variable:            y2           R-squared:             0.876


  Model:                    OLS          Adj. R-squared:        0.875


  Method:              Least Squares     F-statistic:           694.7


  Date:              Sat, 06 May 2017    Prob (F-statistic):  2.80e-46


  Time:                  10:41:14        Log-Likelihood:      -145.38


  No. Observations:          100         AIC:                   294.8


  Df Residuals:               98         BIC:                   300.0


  Df Model:                    1                                     


  Covariance Type:       nonrobust                                   




               coef      std err       t       P>|t|   [0.025     0.975]  


  Intercept      0.7513      0.214      3.505   0.001      0.326      1.177


  x              0.0959      0.004     26.358   0.000      0.089      0.103




  Omnibus:         1.578    Durbin-Watson:         1.778


  Prob(Omnibus):   0.454    Jarque-Bera (JB):      1.623


  Skew:            0.253    Prob(JB):              0.444


  Kurtosis:        2.635    Cond. No.               121.



In [ ]:

Dep. Variable:	y1	R-squared:	0.044
Model:	OLS	Adj. R-squared:	0.034
Method:	Least Squares	F-statistic:	4.497
Date:	Sat, 06 May 2017	Prob (F-statistic):	0.0365
Time:	10:41:13	Log-Likelihood:	-371.18
No. Observations:	100	AIC:	746.4
Df Residuals:	98	BIC:	751.6
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	2.3566	2.050	1.150	0.253	-1.712	6.425
x	0.0738	0.035	2.121	0.036	0.005	0.143

Omnibus:	4.639	Durbin-Watson:	1.885
Prob(Omnibus):	0.098	Jarque-Bera (JB):	4.475
Skew:	-0.518	Prob(JB):	0.107
Kurtosis:	2.956	Cond. No.	121.

Dep. Variable:	y2	R-squared:	0.876
Model:	OLS	Adj. R-squared:	0.875
Method:	Least Squares	F-statistic:	694.7
Date:	Sat, 06 May 2017	Prob (F-statistic):	2.80e-46
Time:	10:41:14	Log-Likelihood:	-145.38
No. Observations:	100	AIC:	294.8
Df Residuals:	98	BIC:	300.0
Df Model:	1
Covariance Type:	nonrobust

Omnibus:	1.578	Durbin-Watson:	1.778
Prob(Omnibus):	0.454	Jarque-Bera (JB):	1.623
Skew:	0.253	Prob(JB):	0.444
Kurtosis:	2.635	Cond. No.	121.