In [ ]:
import pandas as pd
%matplotlib inline
import numpy as np
In [ ]:
titanic = pd.read_csv("data/titanic.csv")
In [ ]:
titanic.columns
First we need to prepare our features. Remember we drop one value in each dummy to avoid the dummy variable trap
In [ ]:
titanic['sex_female'] = titanic['sex'].apply(lambda x:1 if x=='female' else 0)
In [ ]:
dataset = titanic[['survived']].join([pd.get_dummies(titanic['pclass'],prefix="pclass"),titanic.sex_female])
In [ ]:
from sklearn.linear_model import LogisticRegression
In [ ]:
lm = LogisticRegression()
In [ ]:
#drop pclass_1st to avoid dummy variable trap
x = np.asarray(dataset[['pclass_2nd','pclass_3rd','sex_female']])
y = np.asarray(dataset['survived'])
In [ ]:
lm = lm.fit(x,y)
In [ ]:
lm.score(x,y)
In [ ]:
y.mean()
In [ ]:
lm.coef_
In [ ]:
lm.intercept_
In [ ]: