In [1]:
import pandas as pd
import numpy as np
import os
train = pd.read_csv('../data/train.csv')
## extract Title of Name
train['Title'] = list( map( lambda x: x.split(",")[1].split(' ')[1][:-1], train['Name'] ) )
## extract Surname to detect family
train['Surname'] = list( map(lambda x: x.split(',')[0], train['Name']) )
## extract Cabin class
train['CabinC'] = list( map(lambda x: x[0] if isinstance(x, str) else None, train['Cabin']) )
## detecting family group
## same fare or same Ticket, and same Surname
fare_ = train.groupby(['Surname', 'Fare'], as_index=True)
fare = fare_.PassengerId.count()
#
ticket = train.groupby(['Surname', 'Ticket']).PassengerId.count()
family = fare
family['family'] = 0
family['family'] = fare
family.head()
# pd.concat([df1, df4], axis=1, join='inner')
#pd.concat( [train, ticket], axis=1)
Out[1]:
In [ ]: