In [1]:
import pandas as pd
%matplotlib inline
import numpy as np
from sklearn.linear_model import LogisticRegression
In [4]:
df = pd.read_csv('../data/hanford.csv')
In [6]:
df.corr()
Out[6]:
In [13]:
df.describe()
Out[13]:
In [9]:
Q1 = df['Exposure'].quantile(q=0.25)
Q1
Out[9]:
In [10]:
Q2 = df['Exposure'].quantile(q=0.5)
Q2
Out[10]:
In [11]:
Q3 = df['Exposure'].quantile(q=0.75)
Q3
Out[11]:
In [14]:
df['Mortality'].hist(bins=5)
Out[14]:
In [15]:
df['Mortality'].mean()
Out[15]:
In [17]:
df['Mort_high'] = df['Mortality'].apply(lambda x:1 if x>=157.1 else 0)
df['Expo_high'] = df['Exposure'].apply(lambda x:1 if x>=3.41 else 0)
In [18]:
df # logistic regression , high and low mortality are classified.
Out[18]:
In [19]:
lm = linear
In [ ]:
In [ ]:
lm = lm.fit(x,y)
In [ ]:
lm.predict([50])
In [ ]: