In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
In [2]:
df = pd.read_csv("hanford.csv")
In [3]:
df.describe()
Out[3]:
In [4]:
df.corr()
Out[4]:
In [5]:
df.plot(kind='scatter',x='Exposure',y='Mortality')
Out[5]:
In [6]:
lm = smf.ols(formula="Mortality~Exposure",data=df).fit()
In [7]:
lm.params
Out[7]:
In [8]:
intercept, slope = lm.params
In [9]:
df.plot(kind="scatter",x="Exposure",y="Mortality")
plt.plot(df["Mortality"],slope*df["Mortality"]+intercept,"-",color="red")
Out[9]:
In [10]:
lm.summary()
Out[10]:
In [13]:
coefficient_of_correlation=0.858
coefficient_of_correlation
Out[13]:
In [14]:
def predicting_morality_rate(exposure):
return float(exposure)*slope+intercept
In [15]:
predicting_morality_rate(10)
Out[15]:
In [ ]: