In [1]:
import pandas as pd
import numpy as np

import os


train = pd.read_csv('../data/train.csv')

## extract Title of Name
train['Title'] = list( map( lambda x: x.split(",")[1].split(' ')[1][:-1], train['Name'] ) )

## extract Surname to detect family
train['Surname'] = list( map(lambda x: x.split(',')[0], train['Name']) )

## extract Cabin class
train['CabinC'] = list( map(lambda x: x[0] if isinstance(x, str) else None, train['Cabin']) )

## detecting family group
## same fare or same Ticket, and same Surname 
fare_ = train.groupby(['Surname', 'Fare'], as_index=True)
fare = fare_.PassengerId.count()
#
ticket = train.groupby(['Surname', 'Ticket']).PassengerId.count()

family = fare
family['family'] = 0
family['family'] = fare
family.head()
# pd.concat([df1, df4], axis=1, join='inner')
#pd.concat( [train, ticket], axis=1)


Out[1]:
Surname  Fare 
Abbing   7.55     1
Abbott   20.25    2
Abelson  24.00    2
Adahl    7.25     1
Adams    8.05     1
Name: PassengerId, dtype: int64

In [ ]: