In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.formula.api as smf
In [2]:
df = pd.read_csv('../data/hanford.csv')
df.head()
Out[2]:
In [11]:
print('Mortality interquantile: ', df['Mortality'].quantile(0.75) - df['Mortality'].quantile(0.25))
print('Exposure interquantile: ', df['Exposure'].quantile(0.75) - df['Exposure'].quantile(0.25))
print('Mode:', df.mode)
df.describe()
Out[11]:
In [4]:
print("The coefficient is {}. It seems worthy of investigation.".format(df.corr()['Exposure']['Mortality']))
In [5]:
lm = smf.ols(formula='Mortality~Exposure', data=df).fit()
lm.params
Out[5]:
In [6]:
fig, ax = plt.subplots()
ax.plot(df['Exposure'], df['Mortality'], 'o', label="Data")
ax.plot(df['Exposure'], lm.fittedvalues, '-', color='red', label="Prediction")
Out[6]:
In [7]:
intercept, slope = lm.params
result = slope*10 + intercept
print("The result is {}.".format(result))