In [19]:
import pandas as pd
%matplotlib inline
import numpy as np
from sklearn.linear_model import LogisticRegression
In [25]:
df = pd.read_csv('hanford.csv')
df.columns
Out[25]:
In [26]:
df.head(2)
Out[26]:
In [21]:
df.describe()
Out[21]:
In [43]:
df['Mortality'].hist(bins=5)
Out[43]:
In [46]:
df['Mortality'].mean()
Out[46]:
In [50]:
df['Mort_high'] = df['Mortality'].apply(lambda x:1 if x>=147.1 else 0)
df['Expo_high'] = df['Exposure'].apply(lambda x:1 if x>=3.41 else 0)
#lambda is a temporary function
In [51]:
df
Out[51]:
In [54]:
lm = LogisticRegression()
In [64]:
x = np.asarray(df[['Expo_high']])
y = np.asarray(df['Mort_high'])
In [65]:
lm = lm.fit(x,y)
In [66]:
lm.predict([50])
Out[66]: