In [21]:
import pandas as pd
%matplotlib inline
import numpy as np
from sklearn.linear_model import LogisticRegression
import statsmodels.formula.api as smf
In [9]:
df = pd.read_csv("hanford.csv")
In [43]:
df.describe()
df.corr()
Out[43]:
In [32]:
# I could define "high exposure" as 1.5 x IQR, which would be: Q3-Q1, or 6.41-2.49
high_exposure = 4.08*1.5
In [33]:
df['Exposure'].describe()
Out[33]:
In [ ]:
In [ ]:
In [40]:
lm = smf.ols(formula="Mortality~Exposure",data=df).fit() #notice the formula regresses Y on X (Y~X)
In [41]:
intercept, slope = lm.params
In [42]:
lm.params
Out[42]:
In [ ]:
#y=mx+b
In [ ]: