In [1]:
import pandas as pd
%matplotlib inline
import numpy as np
from sklearn.linear_model import LogisticRegression
In [3]:
df=pd.read_csv("hanford.csv")
In [4]:
df.describe()
Out[4]:
In [5]:
df.corr()
Out[5]:
In [8]:
df['Mortality'].hist()
Out[8]:
In [9]:
df['Mortality'].mean()
Out[9]:
In [13]:
#use the median as a threshold
df['Mort_high']=df['Mortality'].apply(lambda x:1 if x>=147.1 else 0)
df['Expo_high']=df['Exposure'].apply(lambda x:1 if x>=3.41 else 0)
In [14]:
def exposure_high(x):
if x>=3.41:
return 1
else:
return 0
In [15]:
df
Out[15]:
In [7]:
Q1=df['Exposure'].quantile(q=0.25)
Q1
Out[7]:
In [ ]:
Q2=df['Exposure'].quantile(q=0.)
In [ ]:
In [ ]:
In [16]:
lm =
In [ ]:
lm=lm.fit(x.y)
In [ ]:
In [ ]:
In [ ]:
In [ ]: