In [51]:
import pandas as pd
%matplotlib inline
import numpy as np
from sklearn.linear_model import LogisticRegression
In [52]:
df =pd.read_csv('data/hanford.csv')
In [53]:
df.head()
Out[53]:
In [54]:
df['Mortality'] = [ float(x) for x in df['Mortality']]
In [55]:
df.describe()
Out[55]:
In [56]:
df.info()
Step 01. Need to prepare features
In [57]:
def high_exposure(x):
if x > 6.41:
return 1
else:
return 0
In [58]:
df['Exposure_classification'] = df['Exposure'].apply(high_exposure)
In [59]:
df.head()
Out[59]:
In [ ]:
In [ ]:
In [60]:
from sklearn.linear_model import LogisticRegression
In [61]:
lm = LogisticRegression()
In [73]:
x = np.asarray(df[['Mortality']])
y = np.asarray(df['Exposure_classification'])
In [74]:
x
Out[74]:
In [75]:
y
Out[75]:
In [ ]:
In [76]:
lm = lm.fit(x,y)
In [77]:
lm.score(x,y)
Out[77]:
In [78]:
lm.coef_
Out[78]:
In [79]:
lm.intercept_
Out[79]:
In [80]:
lm.predict([0,0,1])
In [81]:
lm.predict([0,0,1])
In [ ]: