notebook.community

Edit and run



In [1]:

    
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf



In [2]:

    
df = pd.read_csv("hanford.csv")



In [3]:

    
df.describe()



In [4]:

    
df.corr()









    Out[4]:






  
    
      
      Exposure
      Mortality
    
  
  
    
      Exposure
      1.000000
      0.926345
    
    
      Mortality
      0.926345
      1.000000



In [5]:

    
df.plot(kind='scatter',x='Exposure',y='Mortality')









    Out[5]:





<matplotlib.axes._subplots.AxesSubplot at 0x10c93a320>



In [6]:

    
lm = smf.ols(formula="Mortality~Exposure",data=df).fit()



In [7]:

    
lm.params









    Out[7]:





Intercept    114.715631
Exposure       9.231456
dtype: float64



In [8]:

    
intercept, slope = lm.params



In [9]:

    
df.plot(kind="scatter",x="Exposure",y="Mortality")
plt.plot(df["Mortality"],slope*df["Mortality"]+intercept,"-",color="red")









    Out[9]:





[<matplotlib.lines.Line2D at 0x10c8f64e0>]



In [10]:

    
lm.summary()









    



/Users/sz2472/.virtualenvs/pandas/lib/python3.5/site-packages/scipy/stats/stats.py:1535: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=9
  "anyway, n=%i" % int(n))






    Out[10]:





OLS Regression Results

  Dep. Variable:         Mortality       R-squared:             0.858


  Model:                    OLS          Adj. R-squared:        0.838


  Method:              Least Squares     F-statistic:           42.34


  Date:              Thu, 28 Jul 2016    Prob (F-statistic):  0.000332


  Time:                  10:32:37        Log-Likelihood:      -35.397


  No. Observations:            9         AIC:                   74.79


  Df Residuals:                7         BIC:                   75.19


  Df Model:                    1                                     


  Covariance Type:       nonrobust                                   




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept    114.7156      8.046     14.258   0.000     95.691   133.741


  Exposure       9.2315      1.419      6.507   0.000      5.877    12.586




  Omnibus:         2.914    Durbin-Watson:         1.542


  Prob(Omnibus):   0.233    Jarque-Bera (JB):      0.915


  Skew:           -0.030    Prob(JB):              0.633


  Kurtosis:        1.439    Cond. No.               9.97



In [13]:

    
coefficient_of_correlation=0.858
coefficient_of_correlation









    Out[13]:





0.858



In [14]:

    
def predicting_morality_rate(exposure):
    return float(exposure)*slope+intercept



In [15]:

    
predicting_morality_rate(10)









    Out[15]:





207.03019352841989



In [ ]:

	Exposure	Mortality
count	9.000000	9.000000
mean	4.617778	157.344444
std	3.491192	34.791346
min	1.250000	113.500000
25%	2.490000	130.100000
50%	3.410000	147.100000
75%	6.410000	177.900000
max	11.640000	210.300000

Dep. Variable:	Mortality	R-squared:	0.858
Model:	OLS	Adj. R-squared:	0.838
Method:	Least Squares	F-statistic:	42.34
Date:	Thu, 28 Jul 2016	Prob (F-statistic):	0.000332
Time:	10:32:37	Log-Likelihood:	-35.397
No. Observations:	9	AIC:	74.79
Df Residuals:	7	BIC:	75.19
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
Intercept	114.7156	8.046	14.258	0.000	95.691 133.741
Exposure	9.2315	1.419	6.507	0.000	5.877 12.586

Omnibus:	2.914	Durbin-Watson:	1.542
Prob(Omnibus):	0.233	Jarque-Bera (JB):	0.915
Skew:	-0.030	Prob(JB):	0.633
Kurtosis:	1.439	Cond. No.	9.97