In [24]:
import pandas as pd
import numpy as np

import os


train = pd.read_csv('../data/train.csv')

## extract Title of Name
train['Title'] = list( map( lambda x: x.split(",")[1].split(' ')[1][:-1], train['Name'] ) )

## extract Surname to detect family
train['Surname'] = list( map(lambda x: x.split(',')[0], train['Name']) )

## extract Cabin class
train['CabinC'] = list( map(lambda x: x[0] if isinstance(x, str) else None, train['Cabin']) )

## detecting family group
## same fare or same Ticket, and same Surname 
fare_ = train.groupby(['Surname', 'Fare'], as_index=True)
fare = fare_.PassengerId.count()
#
ticket = train.groupby(['Surname', 'Ticket']).PassengerId.count()

ticket
# pd.concat([df1, df4], axis=1, join='inner')
#pd.concat( [train, ticket], axis=1)


Out[24]:
Surname           Ticket            
Abbing            C.A. 5547             1
Abbott            C.A. 2673             2
Abelson           P/PP 3381             2
Adahl             C 7076                1
Adams             341826                1
Ahlin             7546                  1
Aks               392091                1
Albimona          2699                  1
Alexander         3474                  1
Alhomaki          SOTON/O2 3101287      1
Ali               SOTON/O.Q. 3101311    1
                  SOTON/O.Q. 3101312    1
Allen             24160                 1
                  373450                1
Allison           113781                3
Allum             2223                  1
Andersen-Jensen   350046                1
Anderson          19952                 1
Andersson         3101281               1
                  347082                7
                  350043                1
Andreasson        347466                1
Andrew            231945                1
Andrews           112050                1
                  13502                 1
Angle             226875                1
Appleton          11769                 1
Arnold-Franchi    349237                2
Artagaveytia      PC 17609              1
Asim              SOTON/O.Q. 3101310    1
                                       ..
White             35281                 2
Wick              36928                 2
Widegren          347064                1
Widener           113503                1
Wiklund           3101267               1
Wilhelms          244270                1
Willey            S.O./P.P. 751         1
Williams          244373                1
                  54636                 1
                  A/5 2466              1
                  PC 17597              1
Williams-Lambert  113510                1
Windelov          SOTON/OQ 3101317      1
Wiseman           A/4. 34244            1
Woolner           19947                 1
Wright            113807                1
Yasbeck           2659                  2
Young             PC 17760              1
Youseff           2628                  1
Yousif            2647                  1
Yousseff          2627                  1
Yrois             248747                1
Zabour            2665                  2
Zimmerman         315082                1
de Messemaeker    345572                1
de Mulder         345774                1
de Pelsmaeker     345778                1
del Carlo         SC/PARIS 2167         1
van Billiard      A/5. 851              1
van Melkebeke     345777                1
Name: PassengerId, dtype: int64

In [ ]: