In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier as dt
from sklearn.cross_validation import ShuffleSplit, train_test_split
from sklearn.metrics import make_scorer,accuracy_score
from sklearn.grid_search import GridSearchCV
In [2]:
t_data = pd.read_csv('titanic_data.csv')
l_data = t_data.Survived
t_data = t_data.drop(['Name','PassengerId','Ticket','Cabin','Embarked'],axis=1)
def sexter(data): return 1 if data == 'female' else 0
t_data.Sex = t_data['Sex'].apply(sexter)
t_data.head()
t_data.fillna(0,inplace=True)
In [3]:
t_data.head()
Out[3]:
In [30]:
for group,data in t_data.groupby(['Pclass','Sex']):
dead = data.Survived.value_counts()[0]
dead_p = float(dead)/t_data.Survived.value_counts()[0]
alive = data.Survived.value_counts()[1]
alive_p = float(alive)/t_data.Survived.value_counts()[1]
print ('Pclass %i, Sex, %i dead = %i, dead_p = %0.2f'% (group[0],group[1],dead,dead_p))
print ('Pclass %i, Sex, %i alive = %i, alive_p = %0.2f' % (group[0],group[1],alive,alive_p))
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: