In [1]:
import pandas as pd
%matplotlib inline
import numpy as np
In [2]:
titanic = pd.read_csv("data/titanic.csv")
In [3]:
titanic.columns
Out[3]:
In [4]:
titanic['sex_female'] = titanic['sex'].apply(lambda x:1 if x=='female' else 0)
In [5]:
dataset = titanic[['survived']].join([pd.get_dummies(titanic['pclass'],prefix="pclass"),titanic.sex_female])
In [6]:
from sklearn.linear_model import LogisticRegression
In [7]:
lm = LogisticRegression()
In [8]:
#drop pclass_1st to avoid dummy variable trap
x = np.asarray(dataset[['pclass_2nd','pclass_3rd','sex_female']])
y = np.asarray(dataset['survived'])
In [9]:
lm = lm.fit(x,y)
In [10]:
lm.score(x,y)
Out[10]:
In [ ]:
y.mean()
In [11]:
lm.coef_
Out[11]:
In [12]:
lm.intercept_
Out[12]:
In [14]:
lm.predict([0,0,1])
Out[14]:
In [16]:
lm.predict([0,0,0])
Out[16]:
In [17]:
lm.predict([0,1,0])
Out[17]:
In [19]:
lm.predict_log_proba([0,0,1])
Out[19]:
In [ ]: