In [2]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt # package for doing plotting (necessary for adding the line)
import statsmodels.formula.api as smf # package we'll be using for linear regression
In [3]:
df = pd.read_csv('hanford.csv')
In [5]:
df.describe()
Out[5]:
In [6]:
df.corr()
Out[6]:
In [9]:
lm = smf.ols(formula="Mortality~Exposure",data=df).fit() #notice the formula regresses Y on X (Y~X)
intercept,slope=lm.params
lm.params
Out[9]:
In [10]:
df.plot(kind='scatter', x='Exposure', y ='Mortality')
plt.plot(df["Exposure"],slope*df["Exposure"]+intercept,"-",color="red")
Out[10]:
In [11]:
#y=mx+b
mortality=intercept*10+114.715631
In [12]:
mortality
Out[12]:
In [ ]: