In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt # package for doing plotting (necessary for adding the line)
import statsmodels.formula.api as smf
In [7]:
cd C:\Users\Harsha Devulapalli\Desktop\algorithms\class6
In [8]:
df=pd.read_csv("data/hanford.csv")
In [10]:
df.describe()
Out[10]:
In [11]:
df.corr()
Out[11]:
In [15]:
df.plot(kind='scatter',x='Exposure',y='Mortality')
Out[15]:
In [16]:
lm = smf.ols(formula="Mortality~Exposure",data=df).fit()
In [18]:
lm.params
Out[18]:
In [19]:
intercept, slope = lm.params
In [ ]:
In [22]:
df.plot(kind="scatter",x="Exposure",y="Mortality")
plt.plot(df["Exposure"],slope*df["Exposure"]+intercept,"-",color="red")
Out[22]:
In [26]:
r = df.corr()['Exposure']['Mortality']
r*r
Out[26]:
In [23]:
def predictor(exposure):
return intercept+float(exposure)*slope
In [24]:
predictor(10)
Out[24]:
In [ ]: