This code based on these codes.

https://www.kaggle.com/francksylla/titanic/titanic-machine-learning-from-disaster/code



In [238]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Image, display
%matplotlib inline

train_input = pd.read_csv("../input/train.csv", dtype={"Age": np.float64})
test_input = pd.read_csv("../input/test.csv", dtype={"Age": np.float64})

df = pd.concat([train_input, test_input], ignore_index=True)
df.head()









    Out[238]:






  
    
      
      Age
      Cabin
      Embarked
      Fare
      Name
      Parch
      PassengerId
      Pclass
      Sex
      SibSp
      Survived
      Ticket
    
  
  
    
      0
      22.0
      NaN
      S
      7.2500
      Braund, Mr. Owen Harris
      0
      1
      3
      male
      1
      0.0
      A/5 21171
    
    
      1
      38.0
      C85
      C
      71.2833
      Cumings, Mrs. John Bradley (Florence Briggs Th...
      0
      2
      1
      female
      1
      1.0
      PC 17599
    
    
      2
      26.0
      NaN
      S
      7.9250
      Heikkinen, Miss. Laina
      0
      3
      3
      female
      0
      1.0
      STON/O2. 3101282
    
    
      3
      35.0
      C123
      S
      53.1000
      Futrelle, Mrs. Jacques Heath (Lily May Peel)
      0
      4
      1
      female
      1
      1.0
      113803
    
    
      4
      35.0
      NaN
      S
      8.0500
      Allen, Mr. William Henry
      0
      5
      3
      male
      0
      0.0
      373450



In [239]:

    
print(df.hist())









    



[[<matplotlib.axes._subplots.AxesSubplot object at 0x1285A310>
  <matplotlib.axes._subplots.AxesSubplot object at 0x24FD6430>
  <matplotlib.axes._subplots.AxesSubplot object at 0x16B13B30>]
 [<matplotlib.axes._subplots.AxesSubplot object at 0x159D3210>
  <matplotlib.axes._subplots.AxesSubplot object at 0x18427990>
  <matplotlib.axes._subplots.AxesSubplot object at 0x1FFF28D0>]
 [<matplotlib.axes._subplots.AxesSubplot object at 0x26AC11F0>
  <matplotlib.axes._subplots.AxesSubplot object at 0x1827F8F0>
  <matplotlib.axes._subplots.AxesSubplot object at 0x26F67050>]]



In [240]:

    
categorical_columns = ['Sex', 'Embarked']
numerical_columns = ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare']
text_columns = ['Name', 'Ticket']

def category_to_numeric(df, column_name):
    for category in df[column_name].unique():
        category_column = column_name + '_' + str(category)
        if category_column in df.columns:
            df = df.drop(category_column, axis=1)
        if category_column not in numerical_columns:
            numerical_columns.append(category_column)
    df= pd.concat([df,pd.get_dummies(df[column_name], prefix=column_name)],axis=1)
    return df



In [241]:

    
print(df.hist())









    



[[<matplotlib.axes._subplots.AxesSubplot object at 0x12ADF410>
  <matplotlib.axes._subplots.AxesSubplot object at 0x2709CBB0>
  <matplotlib.axes._subplots.AxesSubplot object at 0x264F6350>]
 [<matplotlib.axes._subplots.AxesSubplot object at 0x2652A630>
  <matplotlib.axes._subplots.AxesSubplot object at 0x28163E10>
  <matplotlib.axes._subplots.AxesSubplot object at 0x26537A30>]
 [<matplotlib.axes._subplots.AxesSubplot object at 0x28F43DB0>
  <matplotlib.axes._subplots.AxesSubplot object at 0x28F8E4D0>
  <matplotlib.axes._subplots.AxesSubplot object at 0x28D537D0>]]



In [242]:

    
# Sex
sns.set(style="whitegrid")

g = sns.factorplot(x="Sex", y="Survived", data=df, size=4, palette="muted")
g.despine(left=True)
g.set_ylabels("survival probability")









    Out[242]:





<seaborn.axisgrid.FacetGrid at 0x2001f090>



In [243]:

    
def get_sex_adult(row):
    age, sex = row
    if age < 18:
        return 'child'
    elif sex == 'female':
        return 'female_adult'
    else:
        return 'male_adult'

df['SexAdult'] = df[['Age', 'Sex']].apply(get_sex_adult, axis=1)
g = sns.factorplot(x="SexAdult", y="Survived", data=df, size=4, palette="muted")

if 'SexAdult' not in categorical_columns:
    categorical_columns.append('SexAdult')



In [244]:

    
# Embarked
df['Embarked'] = df['Embarked'].fillna('unknown')
if 'Embarked' not in categorical_columns:
    categorical_columns.append('Embarked')

df["Embarked_Category"] = pd.Categorical.from_array(df.Embarked).codes
if 'Embarked_Category' not in categorical_columns:
    categorical_columns.append('Embarked_Category')

g = sns.factorplot(x="Embarked_Category", y="Survived", data=df, size=4, palette="muted")
g.despine(left=True)
g.set_ylabels("survival probability")









    Out[244]:





<seaborn.axisgrid.FacetGrid at 0x25c49e50>



In [245]:

    
df_ticket = pd.DataFrame(df['Ticket'].value_counts())
df_ticket.rename(columns={'Ticket':'TicketMembers'}, inplace=True)

df_ticket['Ticket_perishing_women'] = df.Ticket[(df.SexAdult == 'female_adult')
                                               & (df.Survived == 0.0)
                                               & ((df.Parch > 0) | (df.SibSp > 0))].value_counts()
df_ticket['Ticket_perishing_women'] = df_ticket['Ticket_perishing_women'].fillna(0)
df_ticket['TicketGroup_include_perishing_women'] = df_ticket['Ticket_perishing_women'] > 0
df_ticket['TicketGroup_include_perishing_women'] = df_ticket['TicketGroup_include_perishing_women'].astype(int)

df_ticket['Ticket_surviving_men'] = df.Ticket[(df.SexAdult == 'male_adult')
                                              & (df.Survived == 1.0)
                                              & ((df.Parch > 0) | (df.SibSp > 0))].value_counts()
df_ticket['Ticket_surviving_men'] = df_ticket['Ticket_surviving_men'].fillna(0)
df_ticket['TicketGroup_include_surviving_men'] = df_ticket['Ticket_surviving_men'] > 0
df_ticket['TicketGroup_include_surviving_men'] = df_ticket['TicketGroup_include_surviving_men'].astype(int)

df_ticket["TicketId"] = pd.Categorical.from_array(df_ticket.index).codes
df_ticket.loc[df_ticket[df_ticket['TicketMembers'] < 3].index, "TicketId"] = -1
df_ticket["TicketMembers_Simple"] = pd.cut(df_ticket['TicketMembers'], bins=[0,1,4,20], labels=[0,1,2])
if 'TicketGroup_include_perishing_women' not in df.columns:
    df = pd.merge(df, df_ticket, left_on="Ticket", right_index=True, how='left', sort=False)

if 'Ticket_perishing_women' not in numerical_columns:
    numerical_columns.append('Ticket_perishing_women')
if 'TicketGroup_include_perishing_women' not in numerical_columns:
    numerical_columns.append('TicketGroup_include_perishing_women')
if 'Ticket_surviving_men' not in numerical_columns:
    numerical_columns.append('Ticket_surviving_men')
if 'TicketGroup_include_surviving_men' not in numerical_columns:
    numerical_columns.append('TicketGroup_include_surviving_men')
if 'TicketId' not in numerical_columns:
    numerical_columns.append('TicketId')
if 'TicketMembers' not in numerical_columns:
    numerical_columns.append('TicketMembers')
    
g = sns.factorplot(x="TicketGroup_include_perishing_women", y="Survived", data=df, size=4, palette="muted")
g = sns.factorplot(x="Ticket_surviving_men", y="Survived", data=df, size=4, palette="muted")



In [246]:

    
# surname
df['surname'] = df['Name'].apply(lambda x: x.split(',')[0].lower())
df_surname = pd.DataFrame(df['surname'].value_counts())
df_surname.rename(columns={'surname':'SurnameMembers'}, inplace=True)

df_surname['Surname_perishing_women'] = df.surname[(df.SexAdult == 'female_adult')
                                               & (df.Survived == 0.0)
                                               & ((df.Parch > 0) | (df.SibSp > 0))].value_counts()
df_surname['Surname_perishing_women'] = df_surname['Surname_perishing_women'].fillna(0)
df_surname['SurnameGroup_include_perishing_women'] = df_surname['Surname_perishing_women'] > 0
df_surname['SurnameGroup_include_perishing_women'] = df_surname['SurnameGroup_include_perishing_women'].astype(int)

df_surname['Surname_surviving_men'] = df.surname[(df.SexAdult == 'male_adult')
                                              & (df.Survived == 1.0)
                                              & ((df.Parch > 0) | (df.SibSp > 0))].value_counts()
df_surname['Surname_surviving_men'] = df_surname['Surname_surviving_men'].fillna(0)
df_surname['SurnameGroup_include_surviving_men'] = df_surname['Surname_surviving_men'] > 0
df_surname['SurnameGroup_include_surviving_men'] = df_surname['SurnameGroup_include_surviving_men'].astype(int)

df_surname["SurnameId"] = pd.Categorical.from_array(df_surname.index).codes
df_surname.loc[df_surname[df_surname['SurnameMembers'] < 3].index, "SurnameId"] = -1
df_surname["SurnameMembers_Simple"] = pd.cut(df_surname['SurnameMembers'], bins=[0,1,4,20], labels=[0,1,2])
if 'SurnameGroup_include_perishing_women' not in df.columns:
    df = pd.merge(df, df_surname, left_on="surname", right_index=True, how='left', sort=False)


if 'Surname_perishing_women' not in numerical_columns:
    numerical_columns.append('Surname_perishing_women')
if 'SurnameGroup_include_perishing_women' not in numerical_columns:
    numerical_columns.append('SurnameGroup_include_perishing_women')
if 'Surname_surviving_men' not in numerical_columns:
    numerical_columns.append('Surname_surviving_men')
if 'SurnameGroup_include_surviving_men' not in numerical_columns:
    numerical_columns.append('SurnameGroup_include_surviving_men')
if 'SurnameId' not in numerical_columns:
    numerical_columns.append('SurnameId')
if 'SurnameMembers' not in numerical_columns:
    numerical_columns.append('SurnameMembers')
    
g = sns.factorplot(x="SurnameGroup_include_perishing_women", y="Survived", data=df, size=4, palette="muted")
g = sns.factorplot(x="SurnameGroup_include_surviving_men", y="Survived", data=df, size=4, palette="muted")



In [247]:

    
# title
import re
df['Name_title'] = df['Name'].apply(lambda x: re.search(' ([A-Za-z]+)\.', x).group(1))
df.loc[df[df['Name_title'] == 'Ms'].index, 'Name_title'] = 'Miss'
print(df['Name_title'].unique())
if 'Name_title' not in categorical_columns:
    categorical_columns.append('Name_title')
g = sns.factorplot(y="Name_title", x="Survived", data=df, size=4, palette="muted")









    



['Mr' 'Mrs' 'Miss' 'Master' 'Don' 'Rev' 'Dr' 'Mme' 'Major' 'Lady' 'Sir'
 'Mlle' 'Col' 'Capt' 'Countess' 'Jonkheer' 'Dona']



In [248]:

    
title_mapping = {
    "Mr": 1, 
    "Miss": 2, 
    "Ms": 2, 
    "Mlle": 2, 
    "Mrs": 3, 
    "Mme": 3,
    "Master": 4, 
    "Dr": 5, 
    "Rev": 6, 
    "Major": 7, 
    "Capt": 7,
    "Col": 7, 
    "Don": 9,
    "Dona": 9, 
    "Sir": 9, 
    "Lady": 10, 
    "Countess": 10, 
    "Jonkheer": 10, 
}
df["Name_titleCategory"] = df.loc[:,'Name_title'].map(title_mapping)

if 'Name_titleCategory' not in categorical_columns:
    categorical_columns.append('Name_titleCategory')
g = sns.factorplot(x="Name_titleCategory", y="Survived", data=df, size=4, palette="muted")



In [249]:

    
# FamilySize
df['FamilySize'] = df['SibSp'] + df['Parch']
if 'FamilySize' not in numerical_columns:
    numerical_columns.append('FamilySize')
g = sns.factorplot(x="SibSp", y="Survived", data=df, size=4, palette="muted")
g = sns.factorplot(x="Parch", y="Survived", data=df, size=4, palette="muted")
g = sns.factorplot(x="FamilySize", y="Survived", data=df, size=4, palette="muted")



In [250]:

    
# Name Length?
df['NameLength'] = df["Name"].apply(lambda x: len(x))
if 'NameLength' not in numerical_columns:
    numerical_columns.append('NameLength')
g = sns.factorplot(y="NameLength", x="Survived", data=df, size=4, palette="muted")
g.despine(left=True)
g.set_ylabels("survival probability")









    Out[250]:





<seaborn.axisgrid.FacetGrid at 0x2650fb90>



In [251]:

    
# Pclass
g = sns.factorplot(x="Pclass", y="Survived", data=df, size=4, palette="muted")



In [252]:

    
# cabin
# https://www.kaggle.com/c/titanic/prospector#1326
def get_cabin_location(cabin):
    if cabin == ' ':
        return 'no_cabin'
    # The cabin info consists of a letter (corresponding to a deck) 
    # and a cabin number, which is odd for cabins on the starboard side and even for the port.
    cabin_search_result = re.search('\d+', cabin)
    if cabin_search_result:
        type_code = np.int64(cabin_search_result.group(0))
        if type_code % 2 == 0:
            return 'port'
        else:
            return 'starboard'
    return 'unknown'

def get_cabin_deck(cabin):
    if cabin == ' ':
        return 'no_cabin'
    # The cabin info consists of a letter (corresponding to a deck) 
    # and a cabin number, which is odd for cabins on the starboard side and even for the port.
    cabin_search_result = re.search('[A-z]+', cabin)
    if cabin_search_result:
        return cabin_search_result.group(0)
    return 'unknown'

def get_cabin_count(cabin):
    if cabin == ' ':
        return 0
    cabin_search_result = re.findall('([A-z]\d+)', cabin)
    if cabin_search_result:
        return len(cabin_search_result)
    return 0

df['CabinLocation'] = df['Cabin'].fillna(' ').apply(get_cabin_location)
df['CabinDeck'] = df['Cabin'].fillna(' ').apply(get_cabin_deck)
df['CabinCount'] = df['Cabin'].fillna(' ').apply(get_cabin_count)

if 'CabinLocation' not in categorical_columns:
    categorical_columns.append('CabinLocation')
if 'CabinDeck' not in categorical_columns:
    categorical_columns.append('CabinDeck')
if 'CabinCount' not in numerical_columns:
    numerical_columns.append('CabinCount')

g = sns.factorplot(x="Survived", y="CabinLocation", data=df, size=4, palette="muted")
g = sns.factorplot(x="Survived", y="CabinDeck", data=df, size=4, palette="muted")
g = sns.factorplot(x="CabinCount", y="Survived", data=df, size=4, palette="muted")



In [253]:

    
df['CabinCategory'] = pd.Categorical.from_array(df.Cabin.fillna('0').apply(lambda x:x[0])).codes
g = sns.factorplot(y="Survived", x="CabinCategory", data=df, size=4, palette="muted")
if 'CabinCategory' not in categorical_columns:
    categorical_columns.append('CabinCategory')



In [254]:

    
# Fare
# df['Fare'] = df['Fare'].fillna(df['Fare'].mean())
df["Fare"] = df["Fare"].fillna(8.05)
print(df['Fare'].describe())
print(df['Fare'].hist())
g = sns.factorplot(x="Survived", y="Fare", data=df, size=4, palette="muted")









    



count    1309.000000
mean       33.276193
std        51.743584
min         0.000000
25%         7.895800
50%        14.454200
75%        31.275000
max       512.329200
Name: Fare, dtype: float64
Axes(0.125,0.125;0.775x0.775)



In [255]:

    
df['TicketMembers'] = df['TicketMembers'].fillna(0)
print(df.head()[['Pclass','Fare', 'TicketMembers']])
df['Fare_per_ticket_member'] = df['Fare'] / (df['TicketMembers'])
print(df['Fare_per_ticket_member'].hist())
g = sns.factorplot(x="Survived", y="Fare_per_ticket_member", data=df, size=4, palette="muted")









    



   Pclass     Fare  TicketMembers
0       3   7.2500              1
1       1  71.2833              2
2       3   7.9250              1
3       1  53.1000              2
4       3   8.0500              1
Axes(0.125,0.125;0.775x0.775)



In [256]:

    
from math import log

class_fare = pd.DataFrame(columns=['count','mean','std','min','25%','50%','75%','max'])
class_fare.loc[1,:] = df[df['Pclass'] == 1]['Fare'].describe()
class_fare.loc[2,:] = df[df['Pclass'] == 2]['Fare'].describe()
class_fare.loc[3,:] = df[df['Pclass'] == 3]['Fare'].describe()

very_small_val = 0.01
df['Fare_standard_score_with_Pclass'] = df.apply(lambda row: (log(row['Fare'] + very_small_val) - log(class_fare.loc[row['Pclass'], 'mean'] + very_small_val)) / log(class_fare.loc[row['Pclass'], 'std'] + very_small_val), axis=1)
if 'Fare_standard_score_with_Pclass' not in numerical_columns:
    numerical_columns.append('Fare_standard_score_with_Pclass')



In [257]:

    
df[(df['Fare_standard_score_with_Pclass'] >= -0.5) & (df['Fare_standard_score_with_Pclass'] <= 0.5)]['Fare_standard_score_with_Pclass'].hist()
g = sns.factorplot(x="Survived", y="Fare_standard_score_with_Pclass", data=df, size=4, palette="muted")



In [258]:

    
from math import log

class_fare = pd.DataFrame(columns=['count','mean','std','min','25%','50%','75%','max'])
class_fare.loc[1,:] = df[df['Pclass'] == 1]['Fare_per_ticket_member'].describe()
class_fare.loc[2,:] = df[df['Pclass'] == 2]['Fare_per_ticket_member'].describe()
class_fare.loc[3,:] = df[df['Pclass'] == 3]['Fare_per_ticket_member'].describe()

very_small_val = 0.01
df['Fare_per_ticket_member_standard_score_with_Pclass'] = df.apply(lambda row: (log(row['Fare_per_ticket_member'] + very_small_val) - log(class_fare.loc[row['Pclass'], 'mean'] + very_small_val)) / log(class_fare.loc[row['Pclass'], 'std'] + very_small_val), axis=1)
if 'Fare_per_ticket_member_standard_score_with_Pclass' not in numerical_columns:
    numerical_columns.append('Fare_per_ticket_member_standard_score_with_Pclass')



In [259]:

    
df[(df['Fare_per_ticket_member_standard_score_with_Pclass'] >= -0.5) & (df['Fare_per_ticket_member_standard_score_with_Pclass'] <= 0.5)]['Fare_per_ticket_member_standard_score_with_Pclass'].hist()
g = sns.factorplot(x="Survived", y="Fare_per_ticket_member_standard_score_with_Pclass", data=df, size=4, palette="muted")



In [260]:

    
# https://www.kaggle.com/c/titanic/forums/t/11127/do-ticket-numbers-mean-anything
#print(df["Ticket"])
#print(df["Ticket"].value_counts())

def get_ticket_prefix(cabin):
    # The cabin info consists of a letter (corresponding to a deck) 
    # and a cabin number, which is odd for cabins on the starboard side and even for the port.
    cabin_search_result = re.search('[^\d]+', cabin)
    if cabin_search_result:
        return cabin_search_result.group(0).replace('/', '').replace('.', '').strip()
    return 'unknown'

df['TicketPrefix'] = df['Ticket'].apply(get_ticket_prefix)
g = sns.factorplot(y="TicketPrefix", x="Survived", data=df, size=8, palette="muted")

if 'TicketPrefix' not in categorical_columns:
    categorical_columns.append('TicketPrefix')



In [261]:

    
for col in categorical_columns:
    df = category_to_numeric(df, col)



In [262]:

    
# age prediction
from sklearn.ensemble import ExtraTreesRegressor

age_prediction_features = ['Fare', 'Fare_standard_score_with_Pclass',
                           #'Fare_per_ticket_member', 'Fare_per_ticket_member_standard_score_with_Pclass',
                           'Parch', 'Pclass', 'SibSp', 'Sex_female', 'Sex_male', 'FamilySize',
                           'NameLength', 'TicketMembers', 'TicketId', 
                           'Embarked_S', 'Embarked_C', 'Embarked_Q', 'Embarked_unknown', 
                           'Name_title_Mr', 'Name_title_Mrs', 'Name_title_Miss', 'Name_title_Master', 
                           'Name_title_Don', 'Name_title_Rev', 'Name_title_Dr', 'Name_title_Mme', 
                           'Name_title_Major', 'Name_title_Lady', 'Name_title_Sir', 'Name_title_Mlle', 'Name_title_Col', 
                           'Name_title_Capt', 'Name_title_Countess', 'Name_title_Jonkheer', 
                           'CabinLocation_no_cabin', 'CabinLocation_starboard', 'CabinLocation_port', 'CabinDeck_no_cabin', 
                           'CabinDeck_C', 'CabinDeck_E', 'CabinDeck_G', 'CabinDeck_D', 'CabinDeck_A', 'CabinDeck_B', 'CabinDeck_F', 'CabinDeck_T'
                          ]
age_prediction_tree_regressor = ExtraTreesRegressor(n_estimators=200)
age_X_train = df[age_prediction_features][df['Age'].notnull()]
age_Y_train = df['Age'][df['Age'].notnull()]
age_prediction_tree_regressor.fit(age_X_train, np.ravel(age_Y_train))

# predict only isnull values
df['Age_pred'] = df['Age']
df.loc[df[df['Age'].isnull()].index, 'Age_pred'] = age_prediction_tree_regressor.predict(df[age_prediction_features][df['Age'].isnull()])

if 'Age_pred' not in numerical_columns:
    numerical_columns.append('Age_pred')

# add ageGroup
df["AgeGroup"] = pd.cut(df['Age'], bins=[-2000,0,11,15,18,30,49,59,200], labels=[-1, 11,15,18,30,49,59,200])
df["AgeGroup_pred"] = pd.cut(df['Age_pred'], bins=[-2000,11,15,18,30,49,59,200], labels=[11,15,18,30,49,59,200])

if 'AgeGroup' not in numerical_columns:
    numerical_columns.append('AgeGroup')
if 'AgeGroup_pred' not in numerical_columns:
    numerical_columns.append('AgeGroup_pred')
    
g = sns.factorplot(y="Survived", x="AgeGroup", data=df, size=4, palette="muted")
g = sns.factorplot(y="Survived", x="AgeGroup_pred", data=df, size=4, palette="muted")



In [263]:

    
# Frugal_First_Class_Single_Man
# midle age first class single man with large discounted and unknown prefixed ticket and without cabin.
print("died", df[(df['Survived'] == 0) & (df['Sex'] == 'male') 
         & (df['Pclass'] == 1) 
         & (df['Age_pred'] <= 45) 
         & (df['Fare'] > 0)
         & (df['Fare_standard_score_with_Pclass'] < -0.25)
         & (df['TicketPrefix_unknown'] == 1)
         & (df['TicketMembers_Simple'] == 0)
         & (df['CabinCount'] == 0)
        ])
print("survived", df[(df['Survived'] == 1) & (df['Sex'] == 'male') 
         & (df['Pclass'] == 1) 
         & (df['Age_pred'] <= 45) 
         & (df['Fare'] > 0)
         & (df['Fare_standard_score_with_Pclass'] < -0.25)
         & (df['TicketPrefix_unknown'] == 1)
         & (df['TicketMembers_Simple'] == 0)
         & (df['CabinCount'] == 0)
        ])









    



('died', Empty DataFrame
Columns: [Age, Cabin, Embarked, Fare, Name, Parch, PassengerId, Pclass, Sex, SibSp, Survived, Ticket, SexAdult, Embarked_Category, TicketMembers, Ticket_perishing_women, TicketGroup_include_perishing_women, Ticket_surviving_men, TicketGroup_include_surviving_men, TicketId, TicketMembers_Simple, surname, SurnameMembers, Surname_perishing_women, SurnameGroup_include_perishing_women, Surname_surviving_men, SurnameGroup_include_surviving_men, SurnameId, SurnameMembers_Simple, Name_title, Name_titleCategory, FamilySize, NameLength, CabinLocation, CabinDeck, CabinCount, CabinCategory, Fare_per_ticket_member, Fare_standard_score_with_Pclass, Fare_per_ticket_member_standard_score_with_Pclass, TicketPrefix, Sex_female, Sex_male, Embarked_C, Embarked_Q, Embarked_S, Embarked_unknown, SexAdult_child, SexAdult_female_adult, SexAdult_male_adult, Embarked_Category_0, Embarked_Category_1, Embarked_Category_2, Embarked_Category_3, Name_title_Capt, Name_title_Col, Name_title_Countess, Name_title_Don, Name_title_Dona, Name_title_Dr, Name_title_Jonkheer, Name_title_Lady, Name_title_Major, Name_title_Master, Name_title_Miss, Name_title_Mlle, Name_title_Mme, Name_title_Mr, Name_title_Mrs, Name_title_Rev, Name_title_Sir, Name_titleCategory_1, Name_titleCategory_2, Name_titleCategory_3, Name_titleCategory_4, Name_titleCategory_5, Name_titleCategory_6, Name_titleCategory_7, Name_titleCategory_9, Name_titleCategory_10, CabinLocation_no_cabin, CabinLocation_port, CabinLocation_starboard, CabinLocation_unknown, CabinDeck_A, CabinDeck_B, CabinDeck_C, CabinDeck_D, CabinDeck_E, CabinDeck_F, CabinDeck_G, CabinDeck_T, CabinDeck_no_cabin, CabinCategory_0, CabinCategory_1, CabinCategory_2, CabinCategory_3, CabinCategory_4, CabinCategory_5, CabinCategory_6, ...]
Index: []

[0 rows x 138 columns])
('survived',       Age Cabin Embarked   Fare  \
187  45.0   NaN        S  26.55   
447  34.0   NaN        S  26.55   
507   NaN   NaN        S  26.55   
604  35.0   NaN        C  26.55   

                                              Name  Parch  PassengerId  \
187  Romaine, Mr. Charles Hallace ("Mr C Rolmane")      0          188   
447                    Seward, Mr. Frederic Kimber      0          448   
507  Bradley, Mr. George ("George Arthur Brayton")      0          508   
604                Homer, Mr. Harry ("Mr E Haven")      0          605   

     Pclass   Sex  SibSp  Survived  Ticket    SexAdult  Embarked_Category  \
187       1  male      0       1.0  111428  male_adult                  2   
447       1  male      0       1.0  113794  male_adult                  2   
507       1  male      0       1.0  111427  male_adult                  2   
604       1  male      0       1.0  111426  male_adult                  0   

     TicketMembers  Ticket_perishing_women  \
187              1                     0.0   
447              1                     0.0   
507              1                     0.0   
604              1                     0.0   

     TicketGroup_include_perishing_women  Ticket_surviving_men  \
187                                    0                   0.0   
447                                    0                   0.0   
507                                    0                   0.0   
604                                    0                   0.0   

     TicketGroup_include_surviving_men  TicketId  TicketMembers_Simple  \
187                                  0        -1                     0   
447                                  0        -1                     0   
507                                  0        -1                     0   
604                                  0        -1                     0   

     surname  SurnameMembers  Surname_perishing_women  \
187  romaine               1                      0.0   
447   seward               1                      0.0   
507  bradley               2                      0.0   
604    homer               1                      0.0   

     SurnameGroup_include_perishing_women  Surname_surviving_men  \
187                                     0                    0.0   
447                                     0                    0.0   
507                                     0                    0.0   
604                                     0                    0.0   

     SurnameGroup_include_surviving_men  SurnameId  SurnameMembers_Simple  \
187                                   0         -1                      0   
447                                   0         -1                      0   
507                                   0         -1                      1   
604                                   0         -1                      0   

    Name_title  Name_titleCategory  FamilySize  NameLength CabinLocation  \
187         Mr                   1           0          45      no_cabin   
447         Mr                   1           0          27      no_cabin   
507         Mr                   1           0          45      no_cabin   
604         Mr                   1           0          31      no_cabin   

    CabinDeck  CabinCount  CabinCategory  Fare_per_ticket_member  \
187  no_cabin           0              0                   26.55   
447  no_cabin           0              0                   26.55   
507  no_cabin           0              0                   26.55   
604  no_cabin           0              0                   26.55   

     Fare_standard_score_with_Pclass  \
187                        -0.271769   
447                        -0.271769   
507                        -0.271769   
604                        -0.271769   

     Fare_per_ticket_member_standard_score_with_Pclass TicketPrefix  \
187                                          -0.089502      unknown   
447                                          -0.089502      unknown   
507                                          -0.089502      unknown   
604                                          -0.089502      unknown   

     Sex_female  Sex_male  Embarked_C  Embarked_Q  Embarked_S  \
187         0.0       1.0         0.0         0.0         1.0   
447         0.0       1.0         0.0         0.0         1.0   
507         0.0       1.0         0.0         0.0         1.0   
604         0.0       1.0         1.0         0.0         0.0   

     Embarked_unknown  SexAdult_child  SexAdult_female_adult  \
187               0.0             0.0                    0.0   
447               0.0             0.0                    0.0   
507               0.0             0.0                    0.0   
604               0.0             0.0                    0.0   

     SexAdult_male_adult      ...        CabinDeck_E  CabinDeck_F  \
187                  1.0      ...                0.0          0.0   
447                  1.0      ...                0.0          0.0   
507                  1.0      ...                0.0          0.0   
604                  1.0      ...                0.0          0.0   

     CabinDeck_G  CabinDeck_T  CabinDeck_no_cabin  CabinCategory_0  \
187          0.0          0.0                 1.0              1.0   
447          0.0          0.0                 1.0              1.0   
507          0.0          0.0                 1.0              1.0   
604          0.0          0.0                 1.0              1.0   

     CabinCategory_1  CabinCategory_2  CabinCategory_3  CabinCategory_4  \
187              0.0              0.0              0.0              0.0   
447              0.0              0.0              0.0              0.0   
507              0.0              0.0              0.0              0.0   
604              0.0              0.0              0.0              0.0   

     CabinCategory_5  CabinCategory_6  CabinCategory_7  CabinCategory_8  \
187              0.0              0.0              0.0              0.0   
447              0.0              0.0              0.0              0.0   
507              0.0              0.0              0.0              0.0   
604              0.0              0.0              0.0              0.0   

     TicketPrefix_A  TicketPrefix_AQ  TicketPrefix_AS  TicketPrefix_C  \
187             0.0              0.0              0.0             0.0   
447             0.0              0.0              0.0             0.0   
507             0.0              0.0              0.0             0.0   
604             0.0              0.0              0.0             0.0   

     TicketPrefix_CA  TicketPrefix_CASOTON  TicketPrefix_FC  TicketPrefix_FCC  \
187              0.0                   0.0              0.0               0.0   
447              0.0                   0.0              0.0               0.0   
507              0.0                   0.0              0.0               0.0   
604              0.0                   0.0              0.0               0.0   

     TicketPrefix_Fa  TicketPrefix_LINE  TicketPrefix_LP  TicketPrefix_PC  \
187              0.0                0.0              0.0              0.0   
447              0.0                0.0              0.0              0.0   
507              0.0                0.0              0.0              0.0   
604              0.0                0.0              0.0              0.0   

     TicketPrefix_PP  TicketPrefix_PPP  TicketPrefix_SC  TicketPrefix_SCA  \
187              0.0               0.0              0.0               0.0   
447              0.0               0.0              0.0               0.0   
507              0.0               0.0              0.0               0.0   
604              0.0               0.0              0.0               0.0   

     TicketPrefix_SCAH  TicketPrefix_SCAH Basle  TicketPrefix_SCOW  \
187                0.0                      0.0                0.0   
447                0.0                      0.0                0.0   
507                0.0                      0.0                0.0   
604                0.0                      0.0                0.0   

     TicketPrefix_SCPARIS  TicketPrefix_SCParis  TicketPrefix_SOC  \
187                   0.0                   0.0               0.0   
447                   0.0                   0.0               0.0   
507                   0.0                   0.0               0.0   
604                   0.0                   0.0               0.0   

     TicketPrefix_SOP  TicketPrefix_SOPP  TicketPrefix_SOTONO  \
187               0.0                0.0                  0.0   
447               0.0                0.0                  0.0   
507               0.0                0.0                  0.0   
604               0.0                0.0                  0.0   

     TicketPrefix_SOTONOQ  TicketPrefix_SP  TicketPrefix_STONO  \
187                   0.0              0.0                 0.0   
447                   0.0              0.0                 0.0   
507                   0.0              0.0                 0.0   
604                   0.0              0.0                 0.0   

     TicketPrefix_STONOQ  TicketPrefix_SWPP  TicketPrefix_WC  \
187                  0.0                0.0              0.0   
447                  0.0                0.0              0.0   
507                  0.0                0.0              0.0   
604                  0.0                0.0              0.0   

     TicketPrefix_WEP  TicketPrefix_unknown  Age_pred  AgeGroup  AgeGroup_pred  
187               0.0                   1.0      45.0      49.0             49  
447               0.0                   1.0      34.0      49.0             49  
507               0.0                   1.0      45.0       NaN             49  
604               0.0                   1.0      35.0      49.0             49  

[4 rows x 138 columns])



In [264]:

    
df['Frugal_First_Class_Single_Man'] = 0

df.loc[df[(df['Sex'] == 'male') 
         & (df['CabinCount'] > 0)
         & (df['Embarked'] == 'C')
         & (df['SurnameMembers'] == 1)
         & (df['TicketPrefix_unknown'] == 1.0)
         & (df['Fare_standard_score_with_Pclass'] < -0.23)
         & (df['Pclass'] == 1)]['Frugal_First_Class_Single_Man'].index, 'Frugal_First_Class_Single_Man'] = 1
display(df[(df['Frugal_First_Class_Single_Man'] == 1)])
if 'Frugal_First_Class_Single_Man' not in numerical_columns:
    numerical_columns.append('Frugal_First_Class_Single_Man')









    






  
    
      
      Age
      Cabin
      Embarked
      Fare
      Name
      Parch
      PassengerId
      Pclass
      Sex
      SibSp
      Survived
      Ticket
      SexAdult
      Embarked_Category
      TicketMembers
      Ticket_perishing_women
      TicketGroup_include_perishing_women
      Ticket_surviving_men
      TicketGroup_include_surviving_men
      TicketId
      TicketMembers_Simple
      surname
      SurnameMembers
      Surname_perishing_women
      SurnameGroup_include_perishing_women
      Surname_surviving_men
      SurnameGroup_include_surviving_men
      SurnameId
      SurnameMembers_Simple
      Name_title
      Name_titleCategory
      FamilySize
      NameLength
      CabinLocation
      CabinDeck
      CabinCount
      CabinCategory
      Fare_per_ticket_member
      Fare_standard_score_with_Pclass
      Fare_per_ticket_member_standard_score_with_Pclass
      TicketPrefix
      Sex_female
      Sex_male
      Embarked_C
      Embarked_Q
      Embarked_S
      Embarked_unknown
      SexAdult_child
      SexAdult_female_adult
      SexAdult_male_adult
      ...
      CabinDeck_F
      CabinDeck_G
      CabinDeck_T
      CabinDeck_no_cabin
      CabinCategory_0
      CabinCategory_1
      CabinCategory_2
      CabinCategory_3
      CabinCategory_4
      CabinCategory_5
      CabinCategory_6
      CabinCategory_7
      CabinCategory_8
      TicketPrefix_A
      TicketPrefix_AQ
      TicketPrefix_AS
      TicketPrefix_C
      TicketPrefix_CA
      TicketPrefix_CASOTON
      TicketPrefix_FC
      TicketPrefix_FCC
      TicketPrefix_Fa
      TicketPrefix_LINE
      TicketPrefix_LP
      TicketPrefix_PC
      TicketPrefix_PP
      TicketPrefix_PPP
      TicketPrefix_SC
      TicketPrefix_SCA
      TicketPrefix_SCAH
      TicketPrefix_SCAH Basle
      TicketPrefix_SCOW
      TicketPrefix_SCPARIS
      TicketPrefix_SCParis
      TicketPrefix_SOC
      TicketPrefix_SOP
      TicketPrefix_SOPP
      TicketPrefix_SOTONO
      TicketPrefix_SOTONOQ
      TicketPrefix_SP
      TicketPrefix_STONO
      TicketPrefix_STONOQ
      TicketPrefix_SWPP
      TicketPrefix_WC
      TicketPrefix_WEP
      TicketPrefix_unknown
      Age_pred
      AgeGroup
      AgeGroup_pred
      Frugal_First_Class_Single_Man
    
  
  
    
      209
      40.0
      A31
      C
      31.0000
      Blank, Mr. Henry
      0
      210
      1
      male
      0
      1.0
      112277
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      blank
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      16
      starboard
      A
      1
      1
      31.0000
      -0.236466
      -0.032824
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      40.00
      49.0
      49
      1
    
    
      452
      30.0
      C111
      C
      27.7500
      Foreman, Mr. Benjamin Laventall
      0
      453
      1
      male
      0
      0.0
      113051
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      foreman
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      31
      starboard
      C
      1
      3
      27.7500
      -0.261698
      -0.073333
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      30.00
      30.0
      30
      1
    
    
      487
      58.0
      B37
      C
      29.7000
      Kent, Mr. Edward Austin
      0
      488
      1
      male
      0
      0.0
      11771
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      kent
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      23
      starboard
      B
      1
      2
      29.7000
      -0.246226
      -0.048494
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      58.00
      59.0
      59
      1
    
    
      632
      32.0
      B50
      C
      30.5000
      Stahelin-Maeglin, Dr. Max
      0
      633
      1
      male
      0
      1.0
      13214
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      stahelin-maeglin
      1
      0.0
      0
      0.0
      0
      -1
      0
      Dr
      5
      0
      25
      port
      B
      1
      2
      30.5000
      -0.240170
      -0.038772
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      32.00
      49.0
      49
      1
    
    
      839
      NaN
      C47
      C
      29.7000
      Marechal, Mr. Pierre
      0
      840
      1
      male
      0
      1.0
      11774
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      marechal
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      20
      starboard
      C
      1
      3
      29.7000
      -0.246226
      -0.048494
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      42.58
      NaN
      49
      1
    
    
      889
      26.0
      C148
      C
      30.0000
      Behr, Mr. Karl Howell
      0
      890
      1
      male
      0
      1.0
      111369
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      behr
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      21
      port
      C
      1
      3
      30.0000
      -0.243936
      -0.044818
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      26.00
      30.0
      30
      1
    
    
      959
      31.0
      C53
      C
      28.5375
      Tucker, Mr. Gilbert Milligan Jr
      0
      960
      1
      male
      0
      NaN
      2543
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      tucker
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      31
      starboard
      C
      1
      3
      28.5375
      -0.255323
      -0.063098
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      31.00
      49.0
      49
      1
    
    
      1022
      53.0
      C51
      C
      28.5000
      Gracie, Col. Archibald IV
      0
      1023
      1
      male
      0
      NaN
      113780
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      gracie
      1
      0.0
      0
      0.0
      0
      -1
      0
      Col
      7
      0
      25
      starboard
      C
      1
      3
      28.5000
      -0.255622
      -0.063579
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      53.00
      59.0
      59
      1
    
  

8 rows × 139 columns



In [265]:

    
display(df[(df['Sex'] == 'female') & 
   (df['Fare_standard_score_with_Pclass'] <= -0.18) & 
   (df['Age_pred'] > 30) & 
   (df['Pclass'] == 3) & 
   (df['Name_title_Miss'] == 1.0)
  ])

# poor old miss
df['Poor_Old_Miss_Third_Class'] = 0
df.loc[df[(df['Sex'] == 'female') & 
   (df['Fare_standard_score_with_Pclass'] <= -0.18) & 
   (df['Age'] > 30) & 
   (df['Pclass'] == 3) & 
   (df['Name_title_Miss'] == 1.0)].index, 'Poor_Old_Miss_Third_Class'] = 1
       
if 'Poor_Old_Miss_Third_Class' not in numerical_columns:
    numerical_columns.append('Poor_Old_Miss_Third_Class')









    






  
    
      
      Age
      Cabin
      Embarked
      Fare
      Name
      Parch
      PassengerId
      Pclass
      Sex
      SibSp
      Survived
      Ticket
      SexAdult
      Embarked_Category
      TicketMembers
      Ticket_perishing_women
      TicketGroup_include_perishing_women
      Ticket_surviving_men
      TicketGroup_include_surviving_men
      TicketId
      TicketMembers_Simple
      surname
      SurnameMembers
      Surname_perishing_women
      SurnameGroup_include_perishing_women
      Surname_surviving_men
      SurnameGroup_include_surviving_men
      SurnameId
      SurnameMembers_Simple
      Name_title
      Name_titleCategory
      FamilySize
      NameLength
      CabinLocation
      CabinDeck
      CabinCount
      CabinCategory
      Fare_per_ticket_member
      Fare_standard_score_with_Pclass
      Fare_per_ticket_member_standard_score_with_Pclass
      TicketPrefix
      Sex_female
      Sex_male
      Embarked_C
      Embarked_Q
      Embarked_S
      Embarked_unknown
      SexAdult_child
      SexAdult_female_adult
      SexAdult_male_adult
      ...
      CabinDeck_F
      CabinDeck_G
      CabinDeck_T
      CabinDeck_no_cabin
      CabinCategory_0
      CabinCategory_1
      CabinCategory_2
      CabinCategory_3
      CabinCategory_4
      CabinCategory_5
      CabinCategory_6
      CabinCategory_7
      CabinCategory_8
      TicketPrefix_A
      TicketPrefix_AQ
      TicketPrefix_AS
      TicketPrefix_C
      TicketPrefix_CA
      TicketPrefix_CASOTON
      TicketPrefix_FC
      TicketPrefix_FCC
      TicketPrefix_Fa
      TicketPrefix_LINE
      TicketPrefix_LP
      TicketPrefix_PC
      TicketPrefix_PP
      TicketPrefix_PPP
      TicketPrefix_SC
      TicketPrefix_SCA
      TicketPrefix_SCAH
      TicketPrefix_SCAH Basle
      TicketPrefix_SCOW
      TicketPrefix_SCPARIS
      TicketPrefix_SCParis
      TicketPrefix_SOC
      TicketPrefix_SOP
      TicketPrefix_SOPP
      TicketPrefix_SOTONO
      TicketPrefix_SOTONOQ
      TicketPrefix_SP
      TicketPrefix_STONO
      TicketPrefix_STONOQ
      TicketPrefix_SWPP
      TicketPrefix_WC
      TicketPrefix_WEP
      TicketPrefix_unknown
      Age_pred
      AgeGroup
      AgeGroup_pred
      Frugal_First_Class_Single_Man
    
  
  
    
      32
      NaN
      NaN
      Q
      7.7500
      Glynn, Miss. Mary Agatha
      0
      33
      3
      female
      0
      1.0
      335677
      female_adult
      1
      1
      0.0
      0
      0.0
      0
      -1
      0
      glynn
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      24
      no_cabin
      no_cabin
      0
      0
      7.7500
      -0.220785
      0.141736
      unknown
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      35.000
      NaN
      49
      0
    
    
      264
      NaN
      NaN
      Q
      7.7500
      Henry, Miss. Delia
      0
      265
      3
      female
      0
      0.0
      382649
      female_adult
      1
      1
      0.0
      0
      0.0
      0
      -1
      0
      henry
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      18
      no_cabin
      no_cabin
      0
      0
      7.7500
      -0.220785
      0.141736
      unknown
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      30.500
      NaN
      49
      0
    
    
      276
      45.0
      NaN
      S
      7.7500
      Lindblom, Miss. Augusta Charlotta
      0
      277
      3
      female
      0
      0.0
      347073
      female_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      lindblom
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      33
      no_cabin
      no_cabin
      0
      0
      7.7500
      -0.220785
      0.141736
      unknown
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      45.000
      49.0
      49
      0
    
    
      396
      31.0
      NaN
      S
      7.8542
      Olsson, Miss. Elina
      0
      397
      3
      female
      0
      0.0
      350407
      female_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      olsson
      3
      0.0
      0
      0.0
      0
      600
      1
      Miss
      2
      0
      19
      no_cabin
      no_cabin
      0
      0
      7.8542
      -0.215323
      0.175726
      unknown
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      31.000
      49.0
      49
      0
    
    
      573
      NaN
      NaN
      Q
      7.7500
      Kelly, Miss. Mary
      0
      574
      3
      female
      0
      1.0
      14312
      female_adult
      1
      1
      0.0
      0
      0.0
      0
      -1
      0
      kelly
      5
      0.0
      0
      0.0
      0
      406
      2
      Miss
      2
      0
      17
      no_cabin
      no_cabin
      0
      0
      7.7500
      -0.220785
      0.141736
      unknown
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      30.330
      NaN
      49
      0
    
    
      727
      NaN
      NaN
      Q
      7.7375
      Mannion, Miss. Margareth
      0
      728
      3
      female
      0
      1.0
      36866
      female_adult
      1
      1
      0.0
      0
      0.0
      0
      -1
      0
      mannion
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      24
      no_cabin
      no_cabin
      0
      0
      7.7375
      -0.221445
      0.137628
      unknown
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      30.845
      NaN
      49
      0
    
    
      767
      30.5
      NaN
      Q
      7.7500
      Mangan, Miss. Mary
      0
      768
      3
      female
      0
      0.0
      364850
      female_adult
      1
      1
      0.0
      0
      0.0
      0
      -1
      0
      mangan
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      18
      no_cabin
      no_cabin
      0
      0
      7.7500
      -0.220785
      0.141736
      unknown
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      30.500
      49.0
      49
      0
    
    
      1097
      35.0
      NaN
      Q
      7.7500
      McGowan, Miss. Katherine
      0
      1098
      3
      female
      0
      NaN
      9232
      female_adult
      1
      1
      0.0
      0
      0.0
      0
      -1
      0
      mcgowan
      2
      0.0
      0
      0.0
      0
      -1
      1
      Miss
      2
      0
      24
      no_cabin
      no_cabin
      0
      0
      7.7500
      -0.220785
      0.141736
      unknown
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      35.000
      49.0
      49
      0
    
    
      1105
      38.0
      NaN
      S
      7.7750
      Andersson, Miss. Ida Augusta Margareta
      2
      1106
      3
      female
      4
      NaN
      347091
      female_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      andersson
      11
      1.0
      1
      0.0
      0
      21
      2
      Miss
      2
      6
      38
      no_cabin
      no_cabin
      0
      0
      7.7750
      -0.219468
      0.149933
      unknown
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      38.000
      49.0
      49
      0
    
    
      1204
      37.0
      NaN
      Q
      7.7500
      Carr, Miss. Jeannie
      0
      1205
      3
      female
      0
      NaN
      368364
      female_adult
      1
      1
      0.0
      0
      0.0
      0
      -1
      0
      carr
      2
      0.0
      0
      0.0
      0
      -1
      1
      Miss
      2
      0
      19
      no_cabin
      no_cabin
      0
      0
      7.7500
      -0.220785
      0.141736
      unknown
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      37.000
      49.0
      49
      0
    
  

10 rows × 139 columns



In [266]:

    
display(df[(df['Sex'] == 'female') & 
   (df['Fare_standard_score_with_Pclass'] <= -0.18) & 
   (df['Age_pred'] >= 38) & 
   (df['Pclass'] == 2) & 
   (df['Name_title_Miss'] == 1.0) &
   (df['TicketPrefix_unknown'] == 1.0) &
   (df['SurnameMembers_Simple'] == 0)
  ])

# poor old miss
df['Poor_Old_Miss_Second_Class'] = 0
df.loc[df[
        (df['Sex'] == 'female') & 
        (df['Fare_standard_score_with_Pclass'] <= -0.18) & 
        (df['Age_pred'] >= 38) & 
        (df['Pclass'] == 2) & 
        (df['Name_title_Miss'] == 1.0) &
        (df['TicketPrefix_unknown'] == 1.0) &
        (df['SurnameMembers_Simple'] == 0)
         ].index, 'Poor_Old_Miss_Second_Class'] = 1
       
if 'Poor_Old_Miss_Second_Class' not in numerical_columns:
    numerical_columns.append('Poor_Old_Miss_Second_Class')









    






  
    
      
      Age
      Cabin
      Embarked
      Fare
      Name
      Parch
      PassengerId
      Pclass
      Sex
      SibSp
      Survived
      Ticket
      SexAdult
      Embarked_Category
      TicketMembers
      Ticket_perishing_women
      TicketGroup_include_perishing_women
      Ticket_surviving_men
      TicketGroup_include_surviving_men
      TicketId
      TicketMembers_Simple
      surname
      SurnameMembers
      Surname_perishing_women
      SurnameGroup_include_perishing_women
      Surname_surviving_men
      SurnameGroup_include_surviving_men
      SurnameId
      SurnameMembers_Simple
      Name_title
      Name_titleCategory
      FamilySize
      NameLength
      CabinLocation
      CabinDeck
      CabinCount
      CabinCategory
      Fare_per_ticket_member
      Fare_standard_score_with_Pclass
      Fare_per_ticket_member_standard_score_with_Pclass
      TicketPrefix
      Sex_female
      Sex_male
      Embarked_C
      Embarked_Q
      Embarked_S
      Embarked_unknown
      SexAdult_child
      SexAdult_female_adult
      SexAdult_male_adult
      ...
      CabinDeck_G
      CabinDeck_T
      CabinDeck_no_cabin
      CabinCategory_0
      CabinCategory_1
      CabinCategory_2
      CabinCategory_3
      CabinCategory_4
      CabinCategory_5
      CabinCategory_6
      CabinCategory_7
      CabinCategory_8
      TicketPrefix_A
      TicketPrefix_AQ
      TicketPrefix_AS
      TicketPrefix_C
      TicketPrefix_CA
      TicketPrefix_CASOTON
      TicketPrefix_FC
      TicketPrefix_FCC
      TicketPrefix_Fa
      TicketPrefix_LINE
      TicketPrefix_LP
      TicketPrefix_PC
      TicketPrefix_PP
      TicketPrefix_PPP
      TicketPrefix_SC
      TicketPrefix_SCA
      TicketPrefix_SCAH
      TicketPrefix_SCAH Basle
      TicketPrefix_SCOW
      TicketPrefix_SCPARIS
      TicketPrefix_SCParis
      TicketPrefix_SOC
      TicketPrefix_SOP
      TicketPrefix_SOPP
      TicketPrefix_SOTONO
      TicketPrefix_SOTONOQ
      TicketPrefix_SP
      TicketPrefix_STONO
      TicketPrefix_STONOQ
      TicketPrefix_SWPP
      TicketPrefix_WC
      TicketPrefix_WEP
      TicketPrefix_unknown
      Age_pred
      AgeGroup
      AgeGroup_pred
      Frugal_First_Class_Single_Man
      Poor_Old_Miss_Third_Class
    
  
  
    
      357
      38.0
      NaN
      S
      13.0
      Funk, Miss. Annie Clemmer
      0
      358
      2
      female
      0
      0.0
      237671
      female_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      funk
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      25
      no_cabin
      no_cabin
      0
      0
      13.0
      -0.186791
      0.134032
      unknown
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      38.0
      49
      49
      0
      0
    
  

1 rows × 140 columns



In [267]:

    
display(df[
    (df['Sex'] == 'female') & 
    (df['Fare_standard_score_with_Pclass'] <= -0.18) & 
    (df['Age_pred'] >= 35) & 
    (df['Pclass'] == 1) & 
    (df['Name_title_Miss'] == 1.0) &
    (df['SurnameMembers_Simple'] == 0)
  ])

# poor old miss
df['Poor_Old_Miss_First_Class'] = 0
df.loc[df[
            (df['Sex'] == 'female') & 
            (df['Fare_standard_score_with_Pclass'] <= -0.18) & 
            (df['Age_pred'] >= 35) & 
            (df['Pclass'] == 1) & 
            (df['Name_title_Miss'] == 1.0) &
            (df['SurnameMembers_Simple'] == 0)
         ].index, 'Poor_Old_Miss_First_Class'] = 1
       
if 'Poor_Old_Miss_First_Class' not in numerical_columns:
    numerical_columns.append('Poor_Old_Miss_First_Class')









    






  
    
      
      Age
      Cabin
      Embarked
      Fare
      Name
      Parch
      PassengerId
      Pclass
      Sex
      SibSp
      Survived
      Ticket
      SexAdult
      Embarked_Category
      TicketMembers
      Ticket_perishing_women
      TicketGroup_include_perishing_women
      Ticket_surviving_men
      TicketGroup_include_surviving_men
      TicketId
      TicketMembers_Simple
      surname
      SurnameMembers
      Surname_perishing_women
      SurnameGroup_include_perishing_women
      Surname_surviving_men
      SurnameGroup_include_surviving_men
      SurnameId
      SurnameMembers_Simple
      Name_title
      Name_titleCategory
      FamilySize
      NameLength
      CabinLocation
      CabinDeck
      CabinCount
      CabinCategory
      Fare_per_ticket_member
      Fare_standard_score_with_Pclass
      Fare_per_ticket_member_standard_score_with_Pclass
      TicketPrefix
      Sex_female
      Sex_male
      Embarked_C
      Embarked_Q
      Embarked_S
      Embarked_unknown
      SexAdult_child
      SexAdult_female_adult
      SexAdult_male_adult
      ...
      CabinDeck_T
      CabinDeck_no_cabin
      CabinCategory_0
      CabinCategory_1
      CabinCategory_2
      CabinCategory_3
      CabinCategory_4
      CabinCategory_5
      CabinCategory_6
      CabinCategory_7
      CabinCategory_8
      TicketPrefix_A
      TicketPrefix_AQ
      TicketPrefix_AS
      TicketPrefix_C
      TicketPrefix_CA
      TicketPrefix_CASOTON
      TicketPrefix_FC
      TicketPrefix_FCC
      TicketPrefix_Fa
      TicketPrefix_LINE
      TicketPrefix_LP
      TicketPrefix_PC
      TicketPrefix_PP
      TicketPrefix_PPP
      TicketPrefix_SC
      TicketPrefix_SCA
      TicketPrefix_SCAH
      TicketPrefix_SCAH Basle
      TicketPrefix_SCOW
      TicketPrefix_SCPARIS
      TicketPrefix_SCParis
      TicketPrefix_SOC
      TicketPrefix_SOP
      TicketPrefix_SOPP
      TicketPrefix_SOTONO
      TicketPrefix_SOTONOQ
      TicketPrefix_SP
      TicketPrefix_STONO
      TicketPrefix_STONOQ
      TicketPrefix_SWPP
      TicketPrefix_WC
      TicketPrefix_WEP
      TicketPrefix_unknown
      Age_pred
      AgeGroup
      AgeGroup_pred
      Frugal_First_Class_Single_Man
      Poor_Old_Miss_Third_Class
      Poor_Old_Miss_Second_Class
    
  
  
    
      177
      50.0
      C49
      C
      28.7125
      Isham, Miss. Ann Elizabeth
      0
      178
      1
      female
      0
      0.0
      PC 17595
      female_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      isham
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      26
      starboard
      C
      1
      3
      28.7125
      -0.253930
      -0.060862
      PC
      1.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      50.0
      59
      59
      0
      0
      0
    
    
      1003
      36.0
      A29
      C
      31.6792
      Evans, Miss. Edith Corse
      0
      1004
      1
      female
      0
      NaN
      PC 17531
      female_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      evans
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      24
      starboard
      A
      1
      1
      31.6792
      -0.231528
      -0.024897
      PC
      1.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      36.0
      49
      49
      0
      0
      0
    
  

2 rows × 141 columns



In [268]:

    
df[(df['Sex'] == 'female') & (df['Fare'] <= 10) & (df['Age'] > 28) & (df['Name_title_Miss'] == 1.0)]

# poor old miss
df['Poor_Old_Miss'] = 0
df.loc[df[(df['Sex'] == 'female') 
         & (df['Fare'] <= 10) 
         & (df['Age_pred'] > 28) 
         & (df['Name_title_Miss'] == 1.0)].index, 'Poor_Old_Miss'] = 1
       
if 'Poor_Old_Miss' not in numerical_columns:
    numerical_columns.append('Poor_Old_Miss')



In [269]:

    
df[(df['Sex'] == 'female') & (df['Fare'] <= 10) & (df['Age'] > 26) & (df['Embarked'] == 'S') & (df['Name_title_Miss'] == 1.0)]

# poor Shouthampton old miss
df['Poor_Shouthampton_Old_Miss'] = 0
df.loc[df[(df['Sex'] == 'female') 
         & (df['Fare'] <= 10) 
         & (df['Age_pred'] > 26) 
         & (df['Embarked'] == 'S') 
         & (df['Name_title_Miss'] == 1.0)].index, 'Poor_Shouthampton_Old_Miss'] = 1
       
if 'Poor_Shouthampton_Old_Miss' not in numerical_columns:
    numerical_columns.append('Poor_Shouthampton_Old_Miss')



In [270]:

    
# feature selection
from sklearn.feature_selection import SelectKBest, f_classif

df_copied = df.copy()
df_copied['Name_titleCategory'] = df_copied['Name_titleCategory'].fillna(' ')
df_copied['Cabin'] = df_copied['Cabin'].fillna(' ')
df_copied['Age'] = df_copied['Age'].fillna(-300)
df_copied['AgeGroup'] = df_copied['AgeGroup'].fillna(-1.0)

train = df_copied[0:891].copy()
target = train["Survived"].values

selector = SelectKBest(f_classif, k=len(numerical_columns))
selector.fit(train[numerical_columns], target)
scores = -np.log10(selector.pvalues_)
indices = np.argsort(scores)[::-1]
print("Features importance :")
for f in range(len(scores)):
    print("%0.2f %s" % (scores[indices[f]],numerical_columns[indices[f]]))









    



Features importance :
nan Name_title_Dona
nan TicketPrefix_LP
nan TicketPrefix_AQ
nan TicketPrefix_STONOQ
70.61 Name_titleCategory_1
70.61 Name_title_Mr
68.85 Sex_female
68.85 Sex_male
64.90 SexAdult_male_adult
53.23 SexAdult_female_adult
25.12 Name_titleCategory_3
24.68 Name_title_Mrs
24.60 Pclass
24.17 Name_titleCategory_2
23.69 NameLength
23.35 Name_title_Miss
21.51 CabinCategory_0
21.51 CabinLocation_no_cabin
21.51 CabinDeck_no_cabin
17.23 CabinCount
17.00 TicketGroup_include_surviving_men
16.30 Ticket_surviving_men
14.21 Fare
13.54 TicketGroup_include_perishing_women
13.22 CabinLocation_starboard
13.04 SurnameGroup_include_surviving_men
12.71 Surname_surviving_men
10.83 Ticket_perishing_women
10.36 SurnameGroup_include_perishing_women
9.04 Surname_perishing_women
6.84 CabinCategory_2
6.84 CabinDeck_B
6.73 CabinLocation_port
6.36 Embarked_Category_0
6.36 Embarked_C
5.52 Embarked_Category_2
5.52 Embarked_S
5.21 CabinCategory_4
5.21 CabinDeck_D
4.98 TicketPrefix_PC
4.88 CabinCategory_5
4.88 CabinDeck_E
3.59 SexAdult_child
3.35 Fare_standard_score_with_Pclass
3.27 TicketPrefix_A
3.22 CabinDeck_C
3.22 CabinCategory_3
1.96 Name_title_Master
1.96 Name_titleCategory_4
1.95 Age
1.83 Parch
1.43 Age_pred
1.35 TicketPrefix_SOTONOQ
1.28 Name_titleCategory_6
1.28 Name_title_Rev
1.28 TicketMembers
1.26 TicketPrefix_FCC
1.25 AgeGroup_pred
1.20 TicketPrefix_WC
1.14 Name_title_Mlle
1.14 TicketPrefix_SWPP
1.14 Embarked_Category_3
1.14 Embarked_unknown
1.08 CabinCategory_6
1.08 CabinDeck_F
1.07 TicketId
0.81 Frugal_First_Class_Single_Man
0.77 Poor_Old_Miss_Third_Class
0.77 TicketPrefix_SOPP
0.72 Poor_Shouthampton_Old_Miss
0.69 TicketPrefix_SCAH Basle
0.69 Name_title_Mme
0.69 Name_title_Lady
0.69 Name_title_Sir
0.69 Name_title_Countess
0.69 TicketPrefix_SC
0.58 TicketPrefix_SOTONO
0.56 TicketPrefix_SOC
0.53 SibSp
0.50 Name_titleCategory_10
0.50 TicketPrefix_PP
0.50 Fare_per_ticket_member_standard_score_with_Pclass
0.37 CabinDeck_T
0.37 Name_title_Capt
0.37 TicketPrefix_CASOTON
0.37 TicketPrefix_SOP
0.37 CabinCategory_8
0.37 TicketPrefix_FC
0.37 TicketPrefix_SP
0.37 Poor_Old_Miss_Second_Class
0.37 Poor_Old_Miss_First_Class
0.37 TicketPrefix_SCA
0.37 TicketPrefix_Fa
0.37 Name_title_Don
0.37 Name_title_Jonkheer
0.37 TicketPrefix_SCOW
0.37 TicketPrefix_AS
0.35 SurnameMembers
0.30 CabinDeck_A
0.30 CabinCategory_1
0.25 TicketPrefix_CA
0.24 TicketPrefix_LINE
0.23 TicketPrefix_STONO
0.21 FamilySize
0.20 TicketPrefix_SCParis
0.20 CabinDeck_G
0.20 CabinLocation_unknown
0.20 CabinCategory_7
0.13 TicketPrefix_PPP
0.13 Name_titleCategory_9
0.13 Name_title_Col
0.13 TicketPrefix_SCAH
0.13 Name_title_Major
0.09 Name_titleCategory_5
0.09 TicketPrefix_SCPARIS
0.09 Name_title_Dr
0.08 AgeGroup
0.08 SurnameId
0.07 TicketPrefix_WEP
0.04 Embarked_Category_1
0.04 Embarked_Q
0.04 Poor_Old_Miss
0.03 TicketPrefix_C
0.03 Name_titleCategory_7
0.02 TicketPrefix_unknown






    



c:\develop\python27\lib\site-packages\sklearn\feature_selection\univariate_selection.py:113: UserWarning: Features [ 51 113 114 115] are constant.
  UserWarning)



In [271]:

    
# Random Forest
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation

random_forest = RandomForestClassifier(n_estimators=3000, min_samples_split=4, class_weight={0:0.745, 1:0.255})
kfold = cross_validation.KFold(train.shape[0], n_folds=3, random_state=42)

scores = cross_validation.cross_val_score(random_forest, train[numerical_columns], target, cv=kfold)
print("Accuracy: %0.3f (+/- %0.2f) [%s]" % (scores.mean() * 100, scores.std() * 100, 'Random Forest Cross Validation'))

random_forest.fit(train[numerical_columns], target)
score = random_forest.score(train[numerical_columns], target)
print("Accuracy: %0.3f             [%s]" % (score * 100, 'Random Forest full test'))

importances = random_forest.feature_importances_
indices = np.argsort(importances)[::-1]
for f in range(len(numerical_columns)):
    print("%d. feature %d (%f) %s" % (f + 1, indices[f] + 1, importances[indices[f]] * 100, numerical_columns[indices[f]]))









    



Accuracy: 90.236 (+/- 1.20) [Random Forest Cross Validation]
Accuracy: 96.633             [Random Forest full test]
1. feature 23 (4.627239) Sex_male
2. feature 19 (4.499238) NameLength
3. feature 24 (4.458889) Sex_female
4. feature 5 (4.326391) Fare
5. feature 117 (4.148197) Age_pred
6. feature 6 (4.127816) Ticket_perishing_women
7. feature 7 (3.989957) TicketGroup_include_perishing_women
8. feature 21 (3.964469) Fare_standard_score_with_Pclass
9. feature 53 (3.858035) Name_titleCategory_1
10. feature 12 (3.774914) Surname_perishing_women
11. feature 36 (3.649514) Name_title_Mr
12. feature 22 (3.557337) Fare_per_ticket_member_standard_score_with_Pclass
13. feature 13 (3.501268) SurnameGroup_include_perishing_women
14. feature 29 (3.438745) SexAdult_male_adult
15. feature 2 (2.909823) Age
16. feature 30 (2.813614) SexAdult_female_adult
17. feature 1 (2.770948) Pclass
18. feature 8 (2.313915) Ticket_surviving_men
19. feature 9 (2.304947) TicketGroup_include_surviving_men
20. feature 11 (2.067989) TicketMembers
21. feature 18 (1.571138) FamilySize
22. feature 119 (1.489239) AgeGroup_pred
23. feature 17 (1.366975) SurnameMembers
24. feature 118 (1.293844) AgeGroup
25. feature 37 (1.279580) Name_title_Mrs
26. feature 54 (1.274140) Name_titleCategory_3
27. feature 14 (1.252589) Surname_surviving_men
28. feature 55 (1.193222) Name_titleCategory_2
29. feature 15 (1.174035) SurnameGroup_include_surviving_men
30. feature 38 (1.093298) Name_title_Miss
31. feature 75 (0.919906) CabinCategory_0
32. feature 62 (0.904816) CabinLocation_no_cabin
33. feature 3 (0.859511) SibSp
34. feature 66 (0.840639) CabinDeck_no_cabin
35. feature 20 (0.837275) CabinCount
36. feature 16 (0.797545) SurnameId
37. feature 10 (0.740958) TicketId
38. feature 63 (0.476063) CabinLocation_starboard
39. feature 4 (0.470066) Parch
40. feature 87 (0.464124) TicketPrefix_unknown
41. feature 103 (0.455635) TicketPrefix_SWPP
42. feature 25 (0.425454) Embarked_S
43. feature 32 (0.407843) Embarked_Category_2
44. feature 31 (0.404095) SexAdult_child
45. feature 26 (0.336420) Embarked_C
46. feature 77 (0.321972) CabinCategory_5
47. feature 125 (0.321428) Poor_Shouthampton_Old_Miss
48. feature 68 (0.310921) CabinDeck_E
49. feature 33 (0.303446) Embarked_Category_0
50. feature 39 (0.283680) Name_title_Master
51. feature 56 (0.282900) Name_titleCategory_4
52. feature 86 (0.281732) TicketPrefix_STONO
53. feature 64 (0.240633) CabinLocation_port
54. feature 85 (0.240431) TicketPrefix_PC
55. feature 124 (0.226805) Poor_Old_Miss
56. feature 34 (0.223911) Embarked_Category_1
57. feature 27 (0.222036) Embarked_Q
58. feature 120 (0.216239) Frugal_First_Class_Single_Man
59. feature 122 (0.172032) Poor_Old_Miss_Second_Class
60. feature 121 (0.162556) Poor_Old_Miss_Third_Class
61. feature 67 (0.160834) CabinDeck_C
62. feature 76 (0.153976) CabinCategory_3
63. feature 72 (0.153036) CabinDeck_B
64. feature 81 (0.152690) CabinCategory_2
65. feature 70 (0.146989) CabinDeck_D
66. feature 97 (0.138446) TicketPrefix_C
67. feature 79 (0.138029) CabinCategory_4
68. feature 123 (0.135876) Poor_Old_Miss_First_Class
69. feature 110 (0.120063) TicketPrefix_SOPP
70. feature 95 (0.115898) TicketPrefix_SOTONOQ
71. feature 58 (0.109808) Name_titleCategory_6
72. feature 41 (0.098383) Name_title_Rev
73. feature 89 (0.097664) TicketPrefix_CA
74. feature 84 (0.088309) TicketPrefix_A
75. feature 94 (0.084395) TicketPrefix_WC
76. feature 60 (0.082761) Name_titleCategory_7
77. feature 98 (0.078718) TicketPrefix_SCPARIS
78. feature 80 (0.077343) CabinCategory_1
79. feature 71 (0.066849) CabinDeck_A
80. feature 59 (0.066597) Name_titleCategory_5
81. feature 42 (0.061349) Name_title_Dr
82. feature 73 (0.045530) CabinDeck_F
83. feature 101 (0.042656) TicketPrefix_LINE
84. feature 44 (0.042127) Name_title_Major
85. feature 82 (0.040519) CabinCategory_6
86. feature 48 (0.039414) Name_title_Col
87. feature 69 (0.031023) CabinDeck_G
88. feature 78 (0.029813) CabinCategory_7
89. feature 102 (0.022288) TicketPrefix_FCC
90. feature 49 (0.019940) Name_title_Capt
91. feature 96 (0.015541) TicketPrefix_WEP
92. feature 57 (0.013597) Name_titleCategory_9
93. feature 93 (0.011283) TicketPrefix_SOC
94. feature 46 (0.008247) Name_title_Sir
95. feature 40 (0.007050) Name_title_Don
96. feature 106 (0.007004) TicketPrefix_SC
97. feature 61 (0.006852) Name_titleCategory_10
98. feature 65 (0.006806) CabinLocation_unknown
99. feature 90 (0.005564) TicketPrefix_SCParis
100. feature 51 (0.004654) Name_title_Jonkheer
101. feature 47 (0.004555) Name_title_Mlle
102. feature 88 (0.004119) TicketPrefix_PP
103. feature 74 (0.002629) CabinDeck_T
104. feature 35 (0.002126) Embarked_Category_3
105. feature 45 (0.001901) Name_title_Lady
106. feature 83 (0.001782) CabinCategory_8
107. feature 111 (0.001704) TicketPrefix_FC
108. feature 28 (0.001523) Embarked_unknown
109. feature 92 (0.001268) TicketPrefix_SP
110. feature 112 (0.001264) TicketPrefix_SOTONO
111. feature 50 (0.001255) Name_title_Countess
112. feature 109 (0.001002) TicketPrefix_SCAH Basle
113. feature 99 (0.000887) TicketPrefix_SOP
114. feature 105 (0.000826) TicketPrefix_PPP
115. feature 107 (0.000650) TicketPrefix_SCAH
116. feature 113 (0.000617) TicketPrefix_CASOTON
117. feature 104 (0.000520) TicketPrefix_SCOW
118. feature 91 (0.000462) TicketPrefix_SCA
119. feature 108 (0.000442) TicketPrefix_AS
120. feature 100 (0.000170) TicketPrefix_Fa
121. feature 114 (0.000000) TicketPrefix_STONOQ
122. feature 115 (0.000000) TicketPrefix_AQ
123. feature 116 (0.000000) TicketPrefix_LP
124. feature 52 (0.000000) Name_title_Dona
125. feature 43 (0.000000) Name_title_Mme



In [272]:

    
import matplotlib.pyplot as plt

from collections import OrderedDict
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier

ensemble_clfs = [
    ("RandomForestClassifier",
        RandomForestClassifier(
                               n_estimators=3000,
                               n_jobs=8,
                               class_weight={0:0.745, 1:0.255},
                               min_samples_split=4,
                               random_state=42)),
]

kfold = cross_validation.KFold(train.shape[0], n_folds=3, random_state=42)



In [273]:

    
# error_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)
# for label, clf in ensemble_clfs:
#     print("Classifier : %s" % label)
#     for i in range(2, 10):
#         clf.set_params(min_samples_split=i)
#         clf.fit(train[numerical_columns], target)
#         scores = cross_validation.cross_val_score(clf, train[numerical_columns], target, cv=kfold)
#         error_rate[label].append((i, scores.mean() * 100))
#         print("%d estimator" % i)
        
# for label, clf_err in error_rate.items():
#     xs, ys = zip(*clf_err)
#     plt.plot(xs, ys, label=label)
    
# plt.xlim(2, 10)
# plt.xlabel("min_samples_split")
# plt.ylabel("score")
# plt.legend(loc="upper right")
# plt.show()



In [274]:

    
# error_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)
# for label, clf in ensemble_clfs:
#     print("Classifier : %s" % label)
#     for i in range(1, 100, 10):
#         clf.set_params(max_depth=i)
#         clf.fit(train[numerical_columns], target)
#         scores = cross_validation.cross_val_score(clf, train[numerical_columns], target, cv=kfold)
# #         error_rate[label].append((i, scores.mean() * 100))
#         print("max_depth : %d" % i)
        
# for label, clf_err in error_rate.items():
#     xs, ys = zip(*clf_err)
#     plt.plot(xs, ys, label=label)
    
# plt.xlim(0, 100)
# plt.xlabel("max_depth")
# plt.ylabel("score")
# plt.legend(loc="upper right")
# plt.show()



In [275]:

    
features = [
            'Sex_female','Sex_male',
    
            'Age_pred',
    
            'SexAdult_male_adult','SexAdult_female_adult', 'SexAdult_child',
    
            'Name_titleCategory',
#             'Name_titleCategory_1',
#             'Name_titleCategory_2',
#             'Name_titleCategory_3',
#             'Name_titleCategory_4',
#             'Name_titleCategory_5',
#             'Name_titleCategory_6',
#             'Name_titleCategory_7',
#             'Name_titleCategory_9',
#             'Name_titleCategory_10',
#             'Name_title_Mr', 'Name_title_Mrs', 'Name_title_Miss', 'Name_title_Master', 
#             'Name_title_Don', 'Name_title_Rev', 'Name_title_Dr', 'Name_title_Mme', 
#             'Name_title_Major', 'Name_title_Lady', 'Name_title_Sir', 'Name_title_Mlle', 'Name_title_Col', 
#             'Name_title_Capt', 'Name_title_Countess', 'Name_title_Jonkheer', 

            'Pclass', 
            
            'TicketId',
    
            'NameLength',

            'CabinLocation_no_cabin', 'CabinLocation_starboard', 'CabinLocation_port', 
            'CabinCategory',
#             'CabinCategory_0',
#             'CabinCategory_1',
#             'CabinCategory_2',
#             'CabinCategory_3',
#             'CabinCategory_4',
#             'CabinCategory_5',
#             'CabinCategory_6',
#             'CabinCategory_7',
#             'CabinCategory_8',
#             'CabinDeck_C', 'CabinDeck_E', 'CabinDeck_G', 'CabinDeck_D', 'CabinDeck_A', 'CabinDeck_B', 'CabinDeck_F', 'CabinDeck_T','CabinDeck_no_cabin', 

            'SibSp','Parch',
    
            'Fare',
#             'Fare_per_ticket_member',
#             'Fare_standard_score_with_Pclass',
#             'Fare_per_ticket_member_standard_score_with_Pclass',
    
            'Embarked_Category',
#             'Embarked_S','Embarked_Q','Embarked_C','Embarked_unknown',
    
            'SurnameMembers_Simple','SurnameGroup_include_perishing_women','SurnameGroup_include_surviving_men',
    
            'TicketMembers_Simple', 'TicketGroup_include_perishing_women','TicketGroup_include_surviving_men',
    
            'FamilySize', 

#             'Frugal_First_Class_Single_Man',
#             'Poor_Old_Miss',
#             'Poor_Shouthampton_Old_Miss',
#             'Poor_Old_Miss_Third_Class',
#             'Poor_Old_Miss_Second_Class',
#             'Poor_Old_Miss_First_Class',
    
#             'TicketPrefix_SOPP', 'TicketPrefix_WC',
#             'TicketPrefix_unknown', 
#             'TicketPrefix_SCA','TicketPrefix_SP','TicketPrefix_SOP','TicketPrefix_Fa','TicketPrefix_SCOW','TicketPrefix_AS',
#             'TicketPrefix_FC','TicketPrefix_SOTONO','TicketPrefix_CASOTON','TicketPrefix_SWPP','TicketPrefix_SC','TicketPrefix_SCAH Basle',
    
#             'CabinCount',
           ]



In [276]:

    
# analyze failed.
X_train, X_test, y_train, y_test = cross_validation.train_test_split(train, target, test_size=0.2, random_state=42)
random_forest = RandomForestClassifier(n_estimators=3000, min_samples_split=4, class_weight={0:0.745, 1:0.255})
kfold = cross_validation.KFold(X_train.shape[0], n_folds=3, random_state=42)

scores = cross_validation.cross_val_score(random_forest, X_train[features], y_train, cv=kfold)
print("Accuracy: %0.3f (+/- %0.2f) [%s]" % (scores.mean() * 100, scores.std() * 100, 'Random Forest Cross Validation'))

random_forest.fit(X_train[features], y_train)
score = random_forest.score(X_test[features], y_test)
print("Accuracy: %0.3f             [%s]" % (score * 100, 'Random Forest full test'))
pred_test = random_forest.predict(X_test[features])

importances = random_forest.feature_importances_
indices = np.argsort(importances)[::-1]
for f in range(len(features)):
    print("%d. feature %d (%f) %s" % (f + 1, indices[f] + 1, importances[indices[f]] * 100, features[indices[f]]))









    



Accuracy: 89.187 (+/- 1.19) [Random Forest Cross Validation]
Accuracy: 91.620             [Random Forest full test]
1. feature 17 (9.272662) Fare
2. feature 3 (8.709954) Age_pred
3. feature 10 (7.882402) NameLength
4. feature 23 (7.649029) TicketGroup_include_perishing_women
5. feature 7 (7.161242) Name_titleCategory
6. feature 20 (7.011301) SurnameGroup_include_perishing_women
7. feature 2 (6.655039) Sex_male
8. feature 8 (6.208368) Pclass
9. feature 1 (5.798096) Sex_female
10. feature 4 (4.838878) SexAdult_male_adult
11. feature 24 (4.692014) TicketGroup_include_surviving_men
12. feature 5 (3.986412) SexAdult_female_adult
13. feature 22 (3.011894) TicketMembers_Simple
14. feature 25 (2.667078) FamilySize
15. feature 21 (2.497202) SurnameGroup_include_surviving_men
16. feature 14 (2.357309) CabinCategory
17. feature 19 (1.933670) SurnameMembers_Simple
18. feature 18 (1.596739) Embarked_Category
19. feature 9 (1.193332) TicketId
20. feature 11 (1.130423) CabinLocation_no_cabin
21. feature 15 (1.121023) SibSp
22. feature 12 (0.764313) CabinLocation_starboard
23. feature 6 (0.743239) SexAdult_child
24. feature 16 (0.717791) Parch
25. feature 13 (0.400588) CabinLocation_port



In [277]:

    
pd.set_option("display.max_columns",101)
X_test_reseted = X_test.reset_index()
X_test_reseted['Survived_'] = y_test
X_test_reseted['Prediction'] = pred_test
X_test_reseted['pred_result'] = pred_test == y_test



In [278]:

    
display(X_test_reseted[(X_test_reseted['Survived'] == 1.0) & (X_test_reseted['pred_result'] == False)])









    






  
    
      
      index
      Age
      Cabin
      Embarked
      Fare
      Name
      Parch
      PassengerId
      Pclass
      Sex
      SibSp
      Survived
      Ticket
      SexAdult
      Embarked_Category
      TicketMembers
      Ticket_perishing_women
      TicketGroup_include_perishing_women
      Ticket_surviving_men
      TicketGroup_include_surviving_men
      TicketId
      TicketMembers_Simple
      surname
      SurnameMembers
      Surname_perishing_women
      SurnameGroup_include_perishing_women
      Surname_surviving_men
      SurnameGroup_include_surviving_men
      SurnameId
      SurnameMembers_Simple
      Name_title
      Name_titleCategory
      FamilySize
      NameLength
      CabinLocation
      CabinDeck
      CabinCount
      CabinCategory
      Fare_per_ticket_member
      Fare_standard_score_with_Pclass
      Fare_per_ticket_member_standard_score_with_Pclass
      TicketPrefix
      Sex_female
      Sex_male
      Embarked_C
      Embarked_Q
      Embarked_S
      Embarked_unknown
      SexAdult_child
      SexAdult_female_adult
      ...
      CabinCategory_4
      CabinCategory_5
      CabinCategory_6
      CabinCategory_7
      CabinCategory_8
      TicketPrefix_A
      TicketPrefix_AQ
      TicketPrefix_AS
      TicketPrefix_C
      TicketPrefix_CA
      TicketPrefix_CASOTON
      TicketPrefix_FC
      TicketPrefix_FCC
      TicketPrefix_Fa
      TicketPrefix_LINE
      TicketPrefix_LP
      TicketPrefix_PC
      TicketPrefix_PP
      TicketPrefix_PPP
      TicketPrefix_SC
      TicketPrefix_SCA
      TicketPrefix_SCAH
      TicketPrefix_SCAH Basle
      TicketPrefix_SCOW
      TicketPrefix_SCPARIS
      TicketPrefix_SCParis
      TicketPrefix_SOC
      TicketPrefix_SOP
      TicketPrefix_SOPP
      TicketPrefix_SOTONO
      TicketPrefix_SOTONOQ
      TicketPrefix_SP
      TicketPrefix_STONO
      TicketPrefix_STONOQ
      TicketPrefix_SWPP
      TicketPrefix_WC
      TicketPrefix_WEP
      TicketPrefix_unknown
      Age_pred
      AgeGroup
      AgeGroup_pred
      Frugal_First_Class_Single_Man
      Poor_Old_Miss_Third_Class
      Poor_Old_Miss_Second_Class
      Poor_Old_Miss_First_Class
      Poor_Old_Miss
      Poor_Shouthampton_Old_Miss
      Survived_
      Prediction
      pred_result
    
  
  
    
      21
      447
      34.0
      
      S
      26.5500
      Seward, Mr. Frederic Kimber
      0
      448
      1
      male
      0
      1.0
      113794
      male_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      seward
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      27
      no_cabin
      no_cabin
      0
      0
      26.5500
      -0.271769
      -0.089502
      unknown
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      34.0
      49
      49
      0
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      27
      673
      31.0
      
      S
      13.0000
      Wilhelms, Mr. Charles
      0
      674
      2
      male
      0
      1.0
      244270
      male_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      wilhelms
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      21
      no_cabin
      no_cabin
      0
      0
      13.0000
      -0.186791
      0.134032
      unknown
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      31.0
      49
      49
      0
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      34
      204
      18.0
      
      S
      8.0500
      Cohen, Mr. Gurshon "Gus"
      0
      205
      3
      male
      0
      1.0
      A/5 3540
      male_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      cohen
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      24
      no_cabin
      no_cabin
      0
      0
      8.0500
      -0.205253
      0.238394
      A
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      18.0
      18
      18
      0
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      36
      23
      28.0
      A6
      S
      35.5000
      Sloper, Mr. William Thompson
      0
      24
      1
      male
      0
      1.0
      113788
      male_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      sloper
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      28
      port
      A
      1
      1
      35.5000
      -0.205583
      0.016756
      unknown
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      28.0
      30
      30
      0
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      72
      889
      26.0
      C148
      C
      30.0000
      Behr, Mr. Karl Howell
      0
      890
      1
      male
      0
      1.0
      111369
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      behr
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      21
      port
      C
      1
      3
      30.0000
      -0.243936
      -0.044818
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      26.0
      30
      30
      1
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      97
      338
      45.0
      
      S
      8.0500
      Dahl, Mr. Karl Edwart
      0
      339
      3
      male
      0
      1.0
      7598
      male_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      dahl
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      21
      no_cabin
      no_cabin
      0
      0
      8.0500
      -0.205253
      0.238394
      unknown
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      45.0
      49
      49
      0
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      108
      286
      30.0
      
      S
      9.5000
      de Mulder, Mr. Theodore
      0
      287
      3
      male
      0
      1.0
      345774
      male_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      de mulder
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      23
      no_cabin
      no_cabin
      0
      0
      9.5000
      -0.137514
      0.659950
      unknown
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      30.0
      30
      30
      0
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      109
      209
      40.0
      A31
      C
      31.0000
      Blank, Mr. Henry
      0
      210
      1
      male
      0
      1.0
      112277
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      blank
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      16
      starboard
      A
      1
      1
      31.0000
      -0.236466
      -0.032824
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      40.0
      49
      49
      1
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      113
      512
      36.0
      E25
      S
      26.2875
      McGough, Mr. James Robert
      0
      513
      1
      male
      0
      1.0
      PC 17473
      male_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      mcgough
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      25
      starboard
      E
      1
      5
      26.2875
      -0.274033
      -0.093136
      PC
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      ...
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      36.0
      49
      49
      0
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      125
      604
      35.0
      
      C
      26.5500
      Homer, Mr. Harry ("Mr E Haven")
      0
      605
      1
      male
      0
      1.0
      111426
      male_adult
      0
      1
      0.0
      0
      0.0
      0
      -1
      0
      homer
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mr
      1
      0
      31
      no_cabin
      no_cabin
      0
      0
      26.5500
      -0.271769
      -0.089502
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      35.0
      49
      49
      0
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      161
      507
      -300.0
      
      S
      26.5500
      Bradley, Mr. George ("George Arthur Brayton")
      0
      508
      1
      male
      0
      1.0
      111427
      male_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      bradley
      2
      0.0
      0
      0.0
      0
      -1
      1
      Mr
      1
      0
      45
      no_cabin
      no_cabin
      0
      0
      26.5500
      -0.271769
      -0.089502
      unknown
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      45.0
      -1
      49
      0
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
    
      172
      572
      36.0
      E25
      S
      26.3875
      Flynn, Mr. John Irwin ("Irving")
      0
      573
      1
      male
      0
      1.0
      PC 17474
      male_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      flynn
      3
      0.0
      0
      0.0
      0
      257
      1
      Mr
      1
      0
      32
      starboard
      E
      1
      5
      26.3875
      -0.273168
      -0.091747
      PC
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      ...
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      36.0
      49
      49
      0
      0
      0
      0
      0
      0
      1.0
      0.0
      False
    
  

12 rows × 148 columns



In [279]:

    
display(X_test_reseted[(X_test_reseted['Survived'] == 0.0) & (X_test_reseted['pred_result'] == False)])









    






  
    
      
      index
      Age
      Cabin
      Embarked
      Fare
      Name
      Parch
      PassengerId
      Pclass
      Sex
      SibSp
      Survived
      Ticket
      SexAdult
      Embarked_Category
      TicketMembers
      Ticket_perishing_women
      TicketGroup_include_perishing_women
      Ticket_surviving_men
      TicketGroup_include_surviving_men
      TicketId
      TicketMembers_Simple
      surname
      SurnameMembers
      Surname_perishing_women
      SurnameGroup_include_perishing_women
      Surname_surviving_men
      SurnameGroup_include_surviving_men
      SurnameId
      SurnameMembers_Simple
      Name_title
      Name_titleCategory
      FamilySize
      NameLength
      CabinLocation
      CabinDeck
      CabinCount
      CabinCategory
      Fare_per_ticket_member
      Fare_standard_score_with_Pclass
      Fare_per_ticket_member_standard_score_with_Pclass
      TicketPrefix
      Sex_female
      Sex_male
      Embarked_C
      Embarked_Q
      Embarked_S
      Embarked_unknown
      SexAdult_child
      SexAdult_female_adult
      ...
      CabinCategory_4
      CabinCategory_5
      CabinCategory_6
      CabinCategory_7
      CabinCategory_8
      TicketPrefix_A
      TicketPrefix_AQ
      TicketPrefix_AS
      TicketPrefix_C
      TicketPrefix_CA
      TicketPrefix_CASOTON
      TicketPrefix_FC
      TicketPrefix_FCC
      TicketPrefix_Fa
      TicketPrefix_LINE
      TicketPrefix_LP
      TicketPrefix_PC
      TicketPrefix_PP
      TicketPrefix_PPP
      TicketPrefix_SC
      TicketPrefix_SCA
      TicketPrefix_SCAH
      TicketPrefix_SCAH Basle
      TicketPrefix_SCOW
      TicketPrefix_SCPARIS
      TicketPrefix_SCParis
      TicketPrefix_SOC
      TicketPrefix_SOP
      TicketPrefix_SOPP
      TicketPrefix_SOTONO
      TicketPrefix_SOTONOQ
      TicketPrefix_SP
      TicketPrefix_STONO
      TicketPrefix_STONOQ
      TicketPrefix_SWPP
      TicketPrefix_WC
      TicketPrefix_WEP
      TicketPrefix_unknown
      Age_pred
      AgeGroup
      AgeGroup_pred
      Frugal_First_Class_Single_Man
      Poor_Old_Miss_Third_Class
      Poor_Old_Miss_Second_Class
      Poor_Old_Miss_First_Class
      Poor_Old_Miss
      Poor_Shouthampton_Old_Miss
      Survived_
      Prediction
      pred_result
    
  
  
    
      33
      235
      -300.0
      
      S
      7.55
      Harknett, Miss. Alice Phoebe
      0
      236
      3
      female
      0
      0.0
      W./C. 6609
      female_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      harknett
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      28
      no_cabin
      no_cabin
      0
      0
      7.55
      -0.231477
      0.075199
      WC
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      19.31
      -1
      30
      0
      0
      0
      0
      0
      0
      0.0
      1.0
      False
    
    
      65
      772
      57.0
      E77
      S
      10.50
      Mack, Mrs. (Mary)
      0
      773
      2
      female
      0
      0.0
      S.O./P.P. 3
      female_adult
      2
      2
      0.0
      0
      0.0
      0
      -1
      1
      mack
      1
      0.0
      0
      0.0
      0
      -1
      0
      Mrs
      3
      0
      17
      starboard
      E
      1
      5
      5.25
      -0.268509
      -0.797750
      SOPP
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      ...
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      57.00
      59
      59
      0
      0
      0
      0
      0
      0
      0.0
      1.0
      False
    
    
      78
      357
      38.0
      
      S
      13.00
      Funk, Miss. Annie Clemmer
      0
      358
      2
      female
      0
      0.0
      237671
      female_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      funk
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      25
      no_cabin
      no_cabin
      0
      0
      13.00
      -0.186791
      0.134032
      unknown
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      38.00
      49
      49
      0
      0
      1
      0
      0
      0
      0.0
      1.0
      False
    
  

3 rows × 148 columns



In [280]:

    
# select specidic features
random_forest = RandomForestClassifier(n_estimators=3000, min_samples_split=4, class_weight={0:0.745, 1:0.255})
kfold = cross_validation.KFold(train.shape[0], n_folds=3, random_state=42)

scores = cross_validation.cross_val_score(random_forest, train[features], target, cv=kfold)
print("Accuracy: %0.3f (+/- %0.2f) [%s]" % (scores.mean() * 100, scores.std() * 100, 'Random Forest Cross Validation'))

random_forest.fit(train[features], target)
score = random_forest.score(train[features], target)
print("Accuracy: %0.3f             [%s]" % (score * 100, 'Random Forest full test'))

importances = random_forest.feature_importances_
indices = np.argsort(importances)[::-1]
for f in range(len(features)):
    print("%d. feature %d (%f) %s" % (f + 1, indices[f] + 1, importances[indices[f]] * 100, features[indices[f]]))









    



Accuracy: 89.562 (+/- 1.10) [Random Forest Cross Validation]
Accuracy: 96.296             [Random Forest full test]
1. feature 17 (9.195192) Fare
2. feature 3 (8.895260) Age_pred
3. feature 23 (8.440197) TicketGroup_include_perishing_women
4. feature 10 (8.294630) NameLength
5. feature 20 (7.377621) SurnameGroup_include_perishing_women
6. feature 2 (6.882887) Sex_male
7. feature 7 (6.858682) Name_titleCategory
8. feature 1 (6.099656) Sex_female
9. feature 8 (5.239385) Pclass
10. feature 4 (4.682957) SexAdult_male_adult
11. feature 24 (4.608597) TicketGroup_include_surviving_men
12. feature 5 (3.293446) SexAdult_female_adult
13. feature 22 (2.915148) TicketMembers_Simple
14. feature 25 (2.634912) FamilySize
15. feature 14 (2.558125) CabinCategory
16. feature 21 (2.510422) SurnameGroup_include_surviving_men
17. feature 19 (1.855995) SurnameMembers_Simple
18. feature 18 (1.606718) Embarked_Category
19. feature 11 (1.464326) CabinLocation_no_cabin
20. feature 9 (1.163512) TicketId
21. feature 15 (1.143969) SibSp
22. feature 6 (0.658699) SexAdult_child
23. feature 12 (0.615585) CabinLocation_starboard
24. feature 16 (0.602184) Parch
25. feature 13 (0.401896) CabinLocation_port



In [281]:

    
random_forest = RandomForestClassifier(n_estimators=3000, min_samples_split=4, class_weight={0:0.745, 1:0.255})
test = df_copied[891:].copy()
random_forest.fit(train[features], target)
predictions = random_forest.predict(test[features])



In [282]:

    
PassengerId = np.array(test["PassengerId"]).astype(int)
submit_df = pd.DataFrame(predictions, PassengerId, columns = ['Survived']).astype(int)
submit_df.to_csv('titanic.csv', index_label=['PassengerId'])



In [283]:

    
wnot_subimt_df = pd.read_csv("wnot_submit.csv")
wnot_subimt_df = wnot_subimt_df.reset_index().drop('index', axis=1)
wnot_subimt_df = wnot_subimt_df.set_index('PassengerId')
diff = submit_df.copy()
diff['Survived_wnot'] = wnot_subimt_df['Survived']
diff['pred_result'] = diff['Survived_wnot'] == diff['Survived']



In [284]:

    
display(df_copied.loc[diff[(diff['pred_result'] == False)].index - 1, :])
print(diff[(diff['pred_result'] == False)])









    






  
    
      
      Age
      Cabin
      Embarked
      Fare
      Name
      Parch
      PassengerId
      Pclass
      Sex
      SibSp
      Survived
      Ticket
      SexAdult
      Embarked_Category
      TicketMembers
      Ticket_perishing_women
      TicketGroup_include_perishing_women
      Ticket_surviving_men
      TicketGroup_include_surviving_men
      TicketId
      TicketMembers_Simple
      surname
      SurnameMembers
      Surname_perishing_women
      SurnameGroup_include_perishing_women
      Surname_surviving_men
      SurnameGroup_include_surviving_men
      SurnameId
      SurnameMembers_Simple
      Name_title
      Name_titleCategory
      FamilySize
      NameLength
      CabinLocation
      CabinDeck
      CabinCount
      CabinCategory
      Fare_per_ticket_member
      Fare_standard_score_with_Pclass
      Fare_per_ticket_member_standard_score_with_Pclass
      TicketPrefix
      Sex_female
      Sex_male
      Embarked_C
      Embarked_Q
      Embarked_S
      Embarked_unknown
      SexAdult_child
      SexAdult_female_adult
      SexAdult_male_adult
      ...
      CabinCategory_1
      CabinCategory_2
      CabinCategory_3
      CabinCategory_4
      CabinCategory_5
      CabinCategory_6
      CabinCategory_7
      CabinCategory_8
      TicketPrefix_A
      TicketPrefix_AQ
      TicketPrefix_AS
      TicketPrefix_C
      TicketPrefix_CA
      TicketPrefix_CASOTON
      TicketPrefix_FC
      TicketPrefix_FCC
      TicketPrefix_Fa
      TicketPrefix_LINE
      TicketPrefix_LP
      TicketPrefix_PC
      TicketPrefix_PP
      TicketPrefix_PPP
      TicketPrefix_SC
      TicketPrefix_SCA
      TicketPrefix_SCAH
      TicketPrefix_SCAH Basle
      TicketPrefix_SCOW
      TicketPrefix_SCPARIS
      TicketPrefix_SCParis
      TicketPrefix_SOC
      TicketPrefix_SOP
      TicketPrefix_SOPP
      TicketPrefix_SOTONO
      TicketPrefix_SOTONOQ
      TicketPrefix_SP
      TicketPrefix_STONO
      TicketPrefix_STONOQ
      TicketPrefix_SWPP
      TicketPrefix_WC
      TicketPrefix_WEP
      TicketPrefix_unknown
      Age_pred
      AgeGroup
      AgeGroup_pred
      Frugal_First_Class_Single_Man
      Poor_Old_Miss_Third_Class
      Poor_Old_Miss_Second_Class
      Poor_Old_Miss_First_Class
      Poor_Old_Miss
      Poor_Shouthampton_Old_Miss
    
  
  
    
      1088
      18.0
      
      S
      7.7750
      Nilsson, Miss. Berta Olivia
      0
      1089
      3
      female
      0
      NaN
      347066
      female_adult
      2
      1
      0.0
      0
      0.0
      0
      -1
      0
      nilsson
      3
      0.0
      0
      0.0
      0
      577
      1
      Miss
      2
      0
      27
      no_cabin
      no_cabin
      0
      0
      7.775000
      -0.219468
      0.149933
      unknown
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      18.000000
      18
      18
      0
      0
      0
      0
      0
      0
    
    
      1258
      22.0
      
      S
      39.6875
      Riihivouri, Miss. Susanna Juhantytar Sanni""
      0
      1259
      3
      female
      0
      NaN
      3101295
      female_adult
      2
      7
      1.0
      1
      0.0
      0
      335
      2
      riihivouri
      1
      0.0
      0
      0.0
      0
      -1
      0
      Miss
      2
      0
      44
      no_cabin
      no_cabin
      0
      0
      5.669643
      0.447599
      -0.653553
      unknown
      1.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      1.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      22.000000
      30
      30
      0
      0
      0
      0
      0
      0
    
    
      1308
      -300.0
      
      C
      22.3583
      Peter, Master. Michael J
      1
      1309
      3
      male
      1
      NaN
      2668
      male_adult
      0
      3
      0.0
      0
      0.0
      0
      262
      1
      peter
      3
      0.0
      0
      0.0
      0
      639
      1
      Master
      4
      2
      24
      no_cabin
      no_cabin
      0
      0
      7.452767
      0.212708
      0.042212
      unknown
      0.0
      1.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      6.888267
      -1
      11
      0
      0
      0
      0
      0
      0
    
  

3 rows × 144 columns







    



      Survived  Survived_wnot pred_result
1089         1              0       False
1259         1              0       False
1309         0              1       False



In [ ]:

	Age	Cabin	Embarked	Fare	Name	PassengerId	Pclass	Sex	SibSp	Survived	Ticket
0	22.0	NaN	S	7.2500	Braund, Mr. Owen Harris	1	3	male	1	0.0	A/5 21171
1	38.0	C85	C	71.2833	Cumings, Mrs. John Bradley (Florence Briggs Th...	2	1	female	1	1.0	PC 17599
2	26.0	NaN	S	7.9250	Heikkinen, Miss. Laina	3	3	female	0	1.0	STON/O2. 3101282
3	35.0	C123	S	53.1000	Futrelle, Mrs. Jacques Heath (Lily May Peel)	4	1	female	1	1.0	113803
4	35.0	NaN	S	8.0500	Allen, Mr. William Henry	5	3	male	0	0.0	373450

	Age	Cabin	Embarked	Fare	Name	PassengerId	Pclass	Sex	Survived	Ticket	SexAdult	TicketMembers	TicketId	surname	SurnameMembers	SurnameId	Name_title	Name_titleCategory	NameLength	CabinLocation	CabinDeck	CabinCount	CabinCategory	Fare_per_ticket_member	Fare_standard_score_with_Pclass	Fare_per_ticket_member_standard_score_with_Pclass	TicketPrefix	Sex_male	Embarked_C	SexAdult_male_adult	...	CabinCategory_1	CabinCategory_2	CabinCategory_3	TicketPrefix_unknown	Age_pred	AgeGroup	AgeGroup_pred	Frugal_First_Class_Single_Man
209	40.0	A31	C	31.0000	Blank, Mr. Henry	210	1	male	1.0	112277	male_adult	1	-1	blank	1	-1	Mr	1	16	starboard	A	1	1	31.0000	-0.236466	-0.032824	unknown	1.0	1.0	1.0	...	1.0	0.0	0.0	1.0	40.00	49.0	49	1
452	30.0	C111	C	27.7500	Foreman, Mr. Benjamin Laventall	453	1	male	0.0	113051	male_adult	1	-1	foreman	1	-1	Mr	1	31	starboard	C	1	3	27.7500	-0.261698	-0.073333	unknown	1.0	1.0	1.0	...	0.0	0.0	1.0	1.0	30.00	30.0	30	1
487	58.0	B37	C	29.7000	Kent, Mr. Edward Austin	488	1	male	0.0	11771	male_adult	1	-1	kent	1	-1	Mr	1	23	starboard	B	1	2	29.7000	-0.246226	-0.048494	unknown	1.0	1.0	1.0	...	0.0	1.0	0.0	1.0	58.00	59.0	59	1
632	32.0	B50	C	30.5000	Stahelin-Maeglin, Dr. Max	633	1	male	1.0	13214	male_adult	1	-1	stahelin-maeglin	1	-1	Dr	5	25	port	B	1	2	30.5000	-0.240170	-0.038772	unknown	1.0	1.0	1.0	...	0.0	1.0	0.0	1.0	32.00	49.0	49	1
839	NaN	C47	C	29.7000	Marechal, Mr. Pierre	840	1	male	1.0	11774	male_adult	1	-1	marechal	1	-1	Mr	1	20	starboard	C	1	3	29.7000	-0.246226	-0.048494	unknown	1.0	1.0	1.0	...	0.0	0.0	1.0	1.0	42.58	NaN	49	1
889	26.0	C148	C	30.0000	Behr, Mr. Karl Howell	890	1	male	1.0	111369	male_adult	1	-1	behr	1	-1	Mr	1	21	port	C	1	3	30.0000	-0.243936	-0.044818	unknown	1.0	1.0	1.0	...	0.0	0.0	1.0	1.0	26.00	30.0	30	1
959	31.0	C53	C	28.5375	Tucker, Mr. Gilbert Milligan Jr	960	1	male	NaN	2543	male_adult	1	-1	tucker	1	-1	Mr	1	31	starboard	C	1	3	28.5375	-0.255323	-0.063098	unknown	1.0	1.0	1.0	...	0.0	0.0	1.0	1.0	31.00	49.0	49	1
1022	53.0	C51	C	28.5000	Gracie, Col. Archibald IV	1023	1	male	NaN	113780	male_adult	1	-1	gracie	1	-1	Col	7	25	starboard	C	1	3	28.5000	-0.255622	-0.063579	unknown	1.0	1.0	1.0	...	0.0	0.0	1.0	1.0	53.00	59.0	59	1

	Age	Cabin	Embarked	Fare	Name	Parch	PassengerId	Pclass	Sex	SibSp	Survived	Ticket	SexAdult	Embarked_Category	TicketMembers	TicketId	surname	SurnameMembers	Surname_perishing_women	SurnameGroup_include_perishing_women	SurnameId	SurnameMembers_Simple	Name_title	Name_titleCategory	FamilySize	NameLength	CabinLocation	CabinDeck	Fare_per_ticket_member	Fare_standard_score_with_Pclass	Fare_per_ticket_member_standard_score_with_Pclass	TicketPrefix	Sex_female	Embarked_Q	Embarked_S	SexAdult_female_adult	...	CabinDeck_no_cabin	CabinCategory_0	TicketPrefix_unknown	Age_pred	AgeGroup	AgeGroup_pred
32	NaN	NaN	Q	7.7500	Glynn, Miss. Mary Agatha	0	33	3	female	0	1.0	335677	female_adult	1	1	-1	glynn	1	0.0	0	-1	0	Miss	2	0	24	no_cabin	no_cabin	7.7500	-0.220785	0.141736	unknown	1.0	1.0	0.0	1.0	...	1.0	1.0	1.0	35.000	NaN	49
264	NaN	NaN	Q	7.7500	Henry, Miss. Delia	0	265	3	female	0	0.0	382649	female_adult	1	1	-1	henry	1	0.0	0	-1	0	Miss	2	0	18	no_cabin	no_cabin	7.7500	-0.220785	0.141736	unknown	1.0	1.0	0.0	1.0	...	1.0	1.0	1.0	30.500	NaN	49
276	45.0	NaN	S	7.7500	Lindblom, Miss. Augusta Charlotta	0	277	3	female	0	0.0	347073	female_adult	2	1	-1	lindblom	1	0.0	0	-1	0	Miss	2	0	33	no_cabin	no_cabin	7.7500	-0.220785	0.141736	unknown	1.0	0.0	1.0	1.0	...	1.0	1.0	1.0	45.000	49.0	49
396	31.0	NaN	S	7.8542	Olsson, Miss. Elina	0	397	3	female	0	0.0	350407	female_adult	2	1	-1	olsson	3	0.0	0	600	1	Miss	2	0	19	no_cabin	no_cabin	7.8542	-0.215323	0.175726	unknown	1.0	0.0	1.0	1.0	...	1.0	1.0	1.0	31.000	49.0	49
573	NaN	NaN	Q	7.7500	Kelly, Miss. Mary	0	574	3	female	0	1.0	14312	female_adult	1	1	-1	kelly	5	0.0	0	406	2	Miss	2	0	17	no_cabin	no_cabin	7.7500	-0.220785	0.141736	unknown	1.0	1.0	0.0	1.0	...	1.0	1.0	1.0	30.330	NaN	49
727	NaN	NaN	Q	7.7375	Mannion, Miss. Margareth	0	728	3	female	0	1.0	36866	female_adult	1	1	-1	mannion	1	0.0	0	-1	0	Miss	2	0	24	no_cabin	no_cabin	7.7375	-0.221445	0.137628	unknown	1.0	1.0	0.0	1.0	...	1.0	1.0	1.0	30.845	NaN	49
767	30.5	NaN	Q	7.7500	Mangan, Miss. Mary	0	768	3	female	0	0.0	364850	female_adult	1	1	-1	mangan	1	0.0	0	-1	0	Miss	2	0	18	no_cabin	no_cabin	7.7500	-0.220785	0.141736	unknown	1.0	1.0	0.0	1.0	...	1.0	1.0	1.0	30.500	49.0	49
1097	35.0	NaN	Q	7.7500	McGowan, Miss. Katherine	0	1098	3	female	0	NaN	9232	female_adult	1	1	-1	mcgowan	2	0.0	0	-1	1	Miss	2	0	24	no_cabin	no_cabin	7.7500	-0.220785	0.141736	unknown	1.0	1.0	0.0	1.0	...	1.0	1.0	1.0	35.000	49.0	49
1105	38.0	NaN	S	7.7750	Andersson, Miss. Ida Augusta Margareta	2	1106	3	female	4	NaN	347091	female_adult	2	1	-1	andersson	11	1.0	1	21	2	Miss	2	6	38	no_cabin	no_cabin	7.7750	-0.219468	0.149933	unknown	1.0	0.0	1.0	1.0	...	1.0	1.0	1.0	38.000	49.0	49
1204	37.0	NaN	Q	7.7500	Carr, Miss. Jeannie	0	1205	3	female	0	NaN	368364	female_adult	1	1	-1	carr	2	0.0	0	-1	1	Miss	2	0	19	no_cabin	no_cabin	7.7500	-0.220785	0.141736	unknown	1.0	1.0	0.0	1.0	...	1.0	1.0	1.0	37.000	49.0	49

	Age	Cabin	Embarked	Fare	Name	Parch	PassengerId	Pclass	Sex	SibSp	Survived	Ticket	SexAdult	Embarked_Category	TicketMembers	Ticket_perishing_women	TicketGroup_include_perishing_women	Ticket_surviving_men	TicketGroup_include_surviving_men	TicketId	TicketMembers_Simple	surname	SurnameMembers	Surname_perishing_women	SurnameGroup_include_perishing_women	Surname_surviving_men	SurnameGroup_include_surviving_men	SurnameId	SurnameMembers_Simple	Name_title	Name_titleCategory	FamilySize	NameLength	CabinLocation	CabinDeck	CabinCount	CabinCategory	Fare_per_ticket_member	Fare_standard_score_with_Pclass	Fare_per_ticket_member_standard_score_with_Pclass	TicketPrefix	Sex_female	Sex_male	Embarked_C	Embarked_Q	Embarked_S	Embarked_unknown	SexAdult_child	SexAdult_female_adult	SexAdult_male_adult	...	CabinDeck_T	CabinDeck_no_cabin	CabinCategory_0	CabinCategory_1	CabinCategory_2	CabinCategory_3	CabinCategory_4	CabinCategory_5	CabinCategory_6	CabinCategory_7	CabinCategory_8	TicketPrefix_A	TicketPrefix_AQ	TicketPrefix_AS	TicketPrefix_C	TicketPrefix_CA	TicketPrefix_CASOTON	TicketPrefix_FC	TicketPrefix_FCC	TicketPrefix_Fa	TicketPrefix_LINE	TicketPrefix_LP	TicketPrefix_PC	TicketPrefix_PP	TicketPrefix_PPP	TicketPrefix_SC	TicketPrefix_SCA	TicketPrefix_SCAH	TicketPrefix_SCAH Basle	TicketPrefix_SCOW	TicketPrefix_SCPARIS	TicketPrefix_SCParis	TicketPrefix_SOC	TicketPrefix_SOP	TicketPrefix_SOPP	TicketPrefix_SOTONO	TicketPrefix_SOTONOQ	TicketPrefix_SP	TicketPrefix_STONO	TicketPrefix_STONOQ	TicketPrefix_SWPP	TicketPrefix_WC	TicketPrefix_WEP	TicketPrefix_unknown	Age_pred	AgeGroup	AgeGroup_pred	Frugal_First_Class_Single_Man	Poor_Old_Miss_Third_Class	Poor_Old_Miss_Second_Class
177	50.0	C49	C	28.7125	Isham, Miss. Ann Elizabeth	0	178	1	female	0	0.0	PC 17595	female_adult	0	1	0.0	0	0.0	0	-1	0	isham	1	0.0	0	0.0	0	-1	0	Miss	2	0	26	starboard	C	1	3	28.7125	-0.253930	-0.060862	PC	1.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	...	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	50.0	59	59	0	0	0
1003	36.0	A29	C	31.6792	Evans, Miss. Edith Corse	0	1004	1	female	0	NaN	PC 17531	female_adult	0	1	0.0	0	0.0	0	-1	0	evans	1	0.0	0	0.0	0	-1	0	Miss	2	0	24	starboard	A	1	1	31.6792	-0.231528	-0.024897	PC	1.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	...	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	36.0	49	49	0	0	0

	index	Age	Cabin	Embarked	Fare	Name	PassengerId	Pclass	Sex	Survived	Ticket	SexAdult	Embarked_Category	TicketMembers	TicketId	surname	SurnameMembers	SurnameId	SurnameMembers_Simple	Name_title	Name_titleCategory	NameLength	CabinLocation	CabinDeck	CabinCount	CabinCategory	Fare_per_ticket_member	Fare_standard_score_with_Pclass	Fare_per_ticket_member_standard_score_with_Pclass	TicketPrefix	Sex_male	Embarked_C	Embarked_S	...	CabinCategory_5	TicketPrefix_A	TicketPrefix_PC	TicketPrefix_unknown	Age_pred	AgeGroup	AgeGroup_pred	Frugal_First_Class_Single_Man	Survived_	pred_result
21	447	34.0		S	26.5500	Seward, Mr. Frederic Kimber	448	1	male	1.0	113794	male_adult	2	1	-1	seward	1	-1	0	Mr	1	27	no_cabin	no_cabin	0	0	26.5500	-0.271769	-0.089502	unknown	1.0	0.0	1.0	...	0.0	0.0	0.0	1.0	34.0	49	49	0	1.0	False
27	673	31.0		S	13.0000	Wilhelms, Mr. Charles	674	2	male	1.0	244270	male_adult	2	1	-1	wilhelms	1	-1	0	Mr	1	21	no_cabin	no_cabin	0	0	13.0000	-0.186791	0.134032	unknown	1.0	0.0	1.0	...	0.0	0.0	0.0	1.0	31.0	49	49	0	1.0	False
34	204	18.0		S	8.0500	Cohen, Mr. Gurshon "Gus"	205	3	male	1.0	A/5 3540	male_adult	2	1	-1	cohen	1	-1	0	Mr	1	24	no_cabin	no_cabin	0	0	8.0500	-0.205253	0.238394	A	1.0	0.0	1.0	...	0.0	1.0	0.0	0.0	18.0	18	18	0	1.0	False
36	23	28.0	A6	S	35.5000	Sloper, Mr. William Thompson	24	1	male	1.0	113788	male_adult	2	1	-1	sloper	1	-1	0	Mr	1	28	port	A	1	1	35.5000	-0.205583	0.016756	unknown	1.0	0.0	1.0	...	0.0	0.0	0.0	1.0	28.0	30	30	0	1.0	False
72	889	26.0	C148	C	30.0000	Behr, Mr. Karl Howell	890	1	male	1.0	111369	male_adult	0	1	-1	behr	1	-1	0	Mr	1	21	port	C	1	3	30.0000	-0.243936	-0.044818	unknown	1.0	1.0	0.0	...	0.0	0.0	0.0	1.0	26.0	30	30	1	1.0	False
97	338	45.0		S	8.0500	Dahl, Mr. Karl Edwart	339	3	male	1.0	7598	male_adult	2	1	-1	dahl	1	-1	0	Mr	1	21	no_cabin	no_cabin	0	0	8.0500	-0.205253	0.238394	unknown	1.0	0.0	1.0	...	0.0	0.0	0.0	1.0	45.0	49	49	0	1.0	False
108	286	30.0		S	9.5000	de Mulder, Mr. Theodore	287	3	male	1.0	345774	male_adult	2	1	-1	de mulder	1	-1	0	Mr	1	23	no_cabin	no_cabin	0	0	9.5000	-0.137514	0.659950	unknown	1.0	0.0	1.0	...	0.0	0.0	0.0	1.0	30.0	30	30	0	1.0	False
109	209	40.0	A31	C	31.0000	Blank, Mr. Henry	210	1	male	1.0	112277	male_adult	0	1	-1	blank	1	-1	0	Mr	1	16	starboard	A	1	1	31.0000	-0.236466	-0.032824	unknown	1.0	1.0	0.0	...	0.0	0.0	0.0	1.0	40.0	49	49	1	1.0	False
113	512	36.0	E25	S	26.2875	McGough, Mr. James Robert	513	1	male	1.0	PC 17473	male_adult	2	1	-1	mcgough	1	-1	0	Mr	1	25	starboard	E	1	5	26.2875	-0.274033	-0.093136	PC	1.0	0.0	1.0	...	1.0	0.0	1.0	0.0	36.0	49	49	0	1.0	False
125	604	35.0		C	26.5500	Homer, Mr. Harry ("Mr E Haven")	605	1	male	1.0	111426	male_adult	0	1	-1	homer	1	-1	0	Mr	1	31	no_cabin	no_cabin	0	0	26.5500	-0.271769	-0.089502	unknown	1.0	1.0	0.0	...	0.0	0.0	0.0	1.0	35.0	49	49	0	1.0	False
161	507	-300.0		S	26.5500	Bradley, Mr. George ("George Arthur Brayton")	508	1	male	1.0	111427	male_adult	2	1	-1	bradley	2	-1	1	Mr	1	45	no_cabin	no_cabin	0	0	26.5500	-0.271769	-0.089502	unknown	1.0	0.0	1.0	...	0.0	0.0	0.0	1.0	45.0	-1	49	0	1.0	False
172	572	36.0	E25	S	26.3875	Flynn, Mr. John Irwin ("Irving")	573	1	male	1.0	PC 17474	male_adult	2	1	-1	flynn	3	257	1	Mr	1	32	starboard	E	1	5	26.3875	-0.273168	-0.091747	PC	1.0	0.0	1.0	...	1.0	0.0	1.0	0.0	36.0	49	49	0	1.0	False

	index	Age	Cabin	Embarked	Fare	Name	PassengerId	Pclass	Sex	Ticket	SexAdult	Embarked_Category	TicketMembers	TicketId	TicketMembers_Simple	surname	SurnameMembers	SurnameId	Name_title	Name_titleCategory	NameLength	CabinLocation	CabinDeck	CabinCount	CabinCategory	Fare_per_ticket_member	Fare_standard_score_with_Pclass	Fare_per_ticket_member_standard_score_with_Pclass	TicketPrefix	Sex_female	Embarked_S	SexAdult_female_adult	...	CabinCategory_5	TicketPrefix_SOPP	TicketPrefix_WC	TicketPrefix_unknown	Age_pred	AgeGroup	AgeGroup_pred	Poor_Old_Miss_Second_Class	Prediction	pred_result
33	235	-300.0		S	7.55	Harknett, Miss. Alice Phoebe	236	3	female	W./C. 6609	female_adult	2	1	-1	0	harknett	1	-1	Miss	2	28	no_cabin	no_cabin	0	0	7.55	-0.231477	0.075199	WC	1.0	1.0	1.0	...	0.0	0.0	1.0	0.0	19.31	-1	30	0	1.0	False
65	772	57.0	E77	S	10.50	Mack, Mrs. (Mary)	773	2	female	S.O./P.P. 3	female_adult	2	2	-1	1	mack	1	-1	Mrs	3	17	starboard	E	1	5	5.25	-0.268509	-0.797750	SOPP	1.0	1.0	1.0	...	1.0	1.0	0.0	0.0	57.00	59	59	0	1.0	False
78	357	38.0		S	13.00	Funk, Miss. Annie Clemmer	358	2	female	237671	female_adult	2	1	-1	0	funk	1	-1	Miss	2	25	no_cabin	no_cabin	0	0	13.00	-0.186791	0.134032	unknown	1.0	1.0	1.0	...	0.0	0.0	0.0	1.0	38.00	49	49	1	1.0	False

	Age	Embarked	Fare	Name	Parch	PassengerId	Pclass	Sex	SibSp	Survived	Ticket	SexAdult	Embarked_Category	TicketMembers	Ticket_perishing_women	TicketGroup_include_perishing_women	TicketId	TicketMembers_Simple	surname	SurnameMembers	SurnameId	SurnameMembers_Simple	Name_title	Name_titleCategory	FamilySize	NameLength	CabinLocation	CabinDeck	Fare_per_ticket_member	Fare_standard_score_with_Pclass	Fare_per_ticket_member_standard_score_with_Pclass	TicketPrefix	Sex_female	Sex_male	Embarked_C	Embarked_S	SexAdult_female_adult	SexAdult_male_adult	...	TicketPrefix_unknown	Age_pred	AgeGroup	AgeGroup_pred
1088	18.0	S	7.7750	Nilsson, Miss. Berta Olivia	0	1089	3	female	0	NaN	347066	female_adult	2	1	0.0	0	-1	0	nilsson	3	577	1	Miss	2	0	27	no_cabin	no_cabin	7.775000	-0.219468	0.149933	unknown	1.0	0.0	0.0	1.0	1.0	0.0	...	1.0	18.000000	18	18
1258	22.0	S	39.6875	Riihivouri, Miss. Susanna Juhantytar Sanni""	0	1259	3	female	0	NaN	3101295	female_adult	2	7	1.0	1	335	2	riihivouri	1	-1	0	Miss	2	0	44	no_cabin	no_cabin	5.669643	0.447599	-0.653553	unknown	1.0	0.0	0.0	1.0	1.0	0.0	...	1.0	22.000000	30	30
1308	-300.0	C	22.3583	Peter, Master. Michael J	1	1309	3	male	1	NaN	2668	male_adult	0	3	0.0	0	262	1	peter	3	639	1	Master	4	2	24	no_cabin	no_cabin	7.452767	0.212708	0.042212	unknown	0.0	1.0	1.0	0.0	0.0	1.0	...	1.0	6.888267	-1	11