In [1]:
import pandas as pd
titanic=pd.read_csv('http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt')
X=titanic[['pclass','age','sex']]
y=titanic['survived']
X['age'].fillna(X['age'].mean(),inplace=True)
from sklearn.cross_validation import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=33)
from sklearn.feature_extraction import DictVectorizer
vec=DictVectorizer(sparse=False)
X_train=vec.fit_transform(X_train.to_dict(orient='record'))
X_test=vec.transform(X_test.to_dict(orient='record'))
from sklearn.tree import DecisionTreeClassifier
dtc=DecisionTreeClassifier()
dtc.fit(X_train,y_train)
dtc_y_pred=dtc.predict(X_test)
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier()
rfc.fit(X_train,y_train)
rfc_y_pred=rfc.predict(X_test)
from sklearn.ensemble import GradientBoostingClassifier
gbc=GradientBoostingClassifier()
gbc.fit(X_train,y_train)
gbc_y_pred=gbc.predict(X_test)
from sklearn.metrics import classification_report
print 'accuracy of decision tree is ',dtc.score(X_test,y_test)
print classification_report(dtc_y_pred,y_test,target_names=['died','suivived'])
print 'accuracy of random forest classifier is ',rfc.score(X_test,y_test)
print classification_report(rfc_y_pred,y_test,target_names=['died','suivived'])
print 'accuracy of gradient boosting decision tree is ',gbc.score(X_test,y_test)
print classification_report(gbc_y_pred,y_test,target_names=['died','suivived'])


accuracy of decision tree is  0.781155015198
             precision    recall  f1-score   support

       died       0.91      0.78      0.84       236
   suivived       0.58      0.80      0.67        93

avg / total       0.81      0.78      0.79       329

accuracy of random forest classifier is  0.784194528875
             precision    recall  f1-score   support

       died       0.91      0.78      0.84       235
   suivived       0.59      0.80      0.68        94

avg / total       0.82      0.78      0.79       329

accuracy of gradient boosting decision tree is  0.790273556231
             precision    recall  f1-score   support

       died       0.92      0.78      0.84       239
   suivived       0.58      0.82      0.68        90

avg / total       0.83      0.79      0.80       329

/Users/ifeng/anaconda2/lib/python2.7/site-packages/pandas/core/generic.py:3295: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
/Users/ifeng/anaconda2/lib/python2.7/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)