In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.formula.api as smf
In [2]:
df=pd.read_csv('/home/sean/git/algorithms/class6/data/hanford.csv')
In [15]:
df
Out[15]:
In [3]:
df.describe()
Out[3]:
In [22]:
lm = smf.ols(formula="Mortality~Exposure",data=df).fit()
In [23]:
lm.params
Out[23]:
In [28]:
intercept, slope = lm.params
In [30]:
df.plot(kind="scatter",x="Exposure",y="Mortality", linewidths=0)
plt.plot(df["Exposure"],slope*df["Exposure"]+intercept,"-",color="red")
Out[30]:
In [31]:
r_squared=slope**2
In [32]:
exposure=input("Enter county exposure:")
print("Expected mortality rate:", (slope*exposure)+intercept)
In [ ]: