In [15]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
In [4]:
df = pd.read_csv("hanford.csv")
In [13]:
df.head()
Out[13]:
In [6]:
df.describe()
Out[6]:
In [7]:
df.corr()
Out[7]:
In [11]:
df.plot(kind='scatter', x='Exposure', y='Mortality')
Out[11]:
In [16]:
lm = smf.ols(formula="Mortality~Exposure",data=df).fit()
In [17]:
lm.params
Out[17]:
In [19]:
intercept, slope = lm.params
In [20]:
df.plot(kind="scatter",x="Exposure",y="Mortality")
plt.plot(df["Exposure"],slope*df["Exposure"]+intercept,"-",color="red")
Out[20]:
In [21]:
mortality_rate = slope * 10 + intercept
In [22]:
mortality_rate
Out[22]:
In [ ]: