In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
In [4]:
df = pd.read_csv("../data/hanford.csv")
df.head()
Out[4]:
In [5]:
df.describe()
Out[5]:
In [6]:
df.corr()
Out[6]:
In [7]:
df.plot(kind='scatter',x='Exposure',y='Mortality')
Out[7]:
In [13]:
print('Yes.')
In [9]:
lm = smf.ols(formula="Mortality~Exposure",data=df).fit()
lm.params
Out[9]:
In [10]:
intercept, slope = lm.params
In [11]:
df.plot(kind='scatter',x='Exposure',y='Mortality',color='steelblue',linewidth=0)
plt.plot(df["Exposure"],slope*df["Exposure"]+intercept,"-",color="red")
Out[11]:
In [12]:
lm.summary()
Out[12]:
In [14]:
print("R^2 equals to 0.858.")
In [16]:
print("The mortality rate of exposure 10 is", 10*slope+intercept)
In [17]:
def get_mr(exposure):
rate = exposure*slope + intercept
return rate
In [18]:
get_mr(10)
Out[18]:
In [ ]: