In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
%matplotlib inline
plt.style.use('fivethirtyeight')
In [14]:
df = pd.read_csv("~/Documents/LEDE/algorithms/class6/data/hanford.csv")
In [17]:
df
Out[17]:
In [15]:
df.describe()
Out[15]:
In [22]:
r = df.corr()
r**2
Out[22]:
In [16]:
df.plot(kind='scatter', x='Exposure', y='Mortality')
Out[16]:
In [23]:
lm = smf.ols(formula='Mortality~Exposure',data=df).fit()
In [24]:
lm.params
Out[24]:
In [25]:
intercept, slope = lm.params
In [26]:
df.plot(kind='scatter',x='Exposure',y='Mortality')
plt.plot(df['Exposure'],slope*df['Exposure']+intercept,"-")
Out[26]:
In [27]:
def mortality_predictor(exposure):
return slope*float(exposure)+intercept
In [29]:
mortality_predictor(10)
Out[29]:
In [ ]: