In [1]:
import pandas as pd
%matplotlib inline
import numpy as np
from sklearn.linear_model import LogisticRegression
In [5]:
df = pd.read_csv("hanford.csv")
df.head()
Out[5]:
In [6]:
df.describe()
Out[6]:
In [7]:
df.median()
Out[7]:
In [8]:
rang= df['Mortality'].max() - df['Mortality'].min()
rang
Out[8]:
In [10]:
iqr_m = df['Mortality'].quantile(q=0.75)- df['Mortality'].quantile(q=0.25)
iqr_m
Out[10]:
In [11]:
iqr_e = df['Exposure'].quantile(q=0.75)- df['Exposure'].quantile(q=0.25)
iqr_e
Out[11]:
In [12]:
UAL_m= (iqr_m*1.5) + df['Mortality'].quantile(q=0.75)
UAL_m
Out[12]:
In [17]:
UAL_e= (iqr_m*1.5) + df['Exposure'].quantile(q=0.75)
UAL_e
Out[17]:
In [13]:
LAL_m= df['Mortality'].quantile(q=0.25) - (iqr_e*1.5)
LAL_m
Out[13]:
In [14]:
LAL_e= df['Exposure'].quantile(q=0.25) - (iqr_e*1.5)
LAL_e
Out[14]:
In [15]:
len(df[df['Mortality']> UAL_m])
Out[15]:
In [18]:
len(df[df['Exposure']> UAL_e])
Out[18]:
In [19]:
len(df[df['Mortality']< LAL_m])
Out[19]:
In [20]:
len(df[df['Mortality'] > UAL_m])
Out[20]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [41]:
lm = LogisticRegression()
In [49]:
data = np.asarray(df[['Mortality','Exposure']])
x = data[:,1:]
y = data[:,0]
In [50]:
data
Out[50]:
In [44]:
x
Out[44]:
In [45]:
y
Out[45]:
In [48]:
lm.fit(x,y)
In [35]:
lm.coef_
In [36]:
lm.score(x,y)
In [37]:
slope = lm.coef_[0]
In [38]:
intercept = lm.intercept_
In [40]:
lm.predict(50)
In [ ]: