In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt # package for doing plotting (necessary for adding the line)
import statsmodels.formula.api as smf
In [2]:
df = pd.read_csv("hanford.csv")
In [3]:
df.mean()
Out[3]:
In [4]:
df.median()
Out[4]:
In [5]:
iqr = df.quantile(q=0.75)- df.quantile(q=0.25)
iqr
Out[5]:
In [6]:
UAL= (iqr*1.5) + df.quantile(q=0.75)
UAL
Out[6]:
In [8]:
LAL= df.quantile(q=0.25) - (iqr*1.5)
LAL
Out[8]:
In [9]:
df.corr()
Out[9]:
In [10]:
df
Out[10]:
In [16]:
#fig, ax = plt.subplots()
ax= df.plot(kind='scatter', y='Exposure', x='Mortality', color='green', figsize= (7,5))
ax
Out[16]:
In [17]:
lm = smf.ols(formula="Mortality ~ Exposure",data=df).fit()
In [20]:
lm.params
Out[20]:
In [21]:
def mortality_rate_calculator(exposure):
return (114.715631 + (9.231456 * float(exposure)))
In [23]:
df['predicted_mortality_rate'] = df['Exposure'].apply(mortality_rate_calculator)
df
Out[23]:
In [26]:
intercept, slope = lm.params
In [ ]:
#DONT KNOW WHAT HAPPENED HERE :S
In [31]:
df.plot(kind='scatter', y='Exposure', x='Mortality', color='green', figsize= (7,5))
plt.plot(df["Exposure"],slope *df["Exposure"]+ intercept,"-",color="red")
Out[31]:
In [ ]: