In [15]:
import pandas as pd
%matplotlib inline
import numpy as np
from sklearn.linear_model import LogisticRegression
In [16]:
df = pd.read_csv("../data/hanford.csv")
In [17]:
df.describe()
Out[17]:
In [5]:
df['Mortality'].hist(bins=5)
Out[5]:
In [6]:
df['Mortality'].mean()
Out[6]:
In [18]:
df['Mort_high'] = df['Mortality'].apply(lambda x:1 if x>=147.1 else 0)
df['Expo_high'] = df['Exposure'].apply(lambda x:1 if x>=3.41 else 0)
In [20]:
def exposure_high(x):
if x >= 3.41:
return 1
else:
return 0
In [19]:
df
Out[19]:
In [10]:
lm = LogisticRegression()
In [25]:
x = np.asarray(df[['Exposure']])
y = np.asarray(df['Mort_high'])
In [26]:
lm = lm.fit(x,y)
In [27]:
lm.predict([50])
Out[27]:
In [ ]: