In [40]:
import pandas as pd
%matplotlib inline
import numpy as np
from sklearn.linear_model import LogisticRegression
In [41]:
df = pd.read_csv('../data/hanford.csv')
In [42]:
df.describe()
Out[42]:
In [43]:
iqr = df.quantile(q=0.75) - df.quantile(q=0.25)
iqr
Out[43]:
In [44]:
df.boxplot(column='Exposure', return_type='axes')
Out[44]:
In [45]:
threshold = df['Exposure'].median()
In [46]:
df_high = df[df['Exposure'] >= threshold]
df_high.shape
Out[46]:
In [47]:
df_high
Out[47]:
In [ ]:
In [61]:
#lr = LogisticRegression(df_high)
lm = LogisticRegression()
In [72]:
#X = df_high['Exposure'].values
#y = df_high['Mortality'].values
data = np.asarray(df_high[['Mortality','Exposure']])
#X = data[:,1:] # ',' = all of the rows
#y = data[:,0]
x = np.asarray(df_high[['Exposure']])
y = np.asarray(df_high['Mortality'])
#lr.fit(x, y)
x, y
Out[72]:
In [ ]:
In [64]:
#lr.predict(50)
In [ ]: