In [1]:

    
import pandas as pd
import numpy as np
import re
import itertools

1. Load train and test data



In [2]:

    
train = pd.read_csv("data/train.csv")
train["dataset"] = "train"
train.head()









    Out[2]:






  
    
      
      PassengerId
      Survived
      Pclass
      Name
      Sex
      Age
      SibSp
      Parch
      Ticket
      Fare
      Cabin
      Embarked
      dataset
    
  
  
    
      0
      1
      0
      3
      Braund, Mr. Owen Harris
      male
      22
      1
      0
      A/5 21171
      7.2500
      NaN
      S
      train
    
    
      1
      2
      1
      1
      Cumings, Mrs. John Bradley (Florence Briggs Th...
      female
      38
      1
      0
      PC 17599
      71.2833
      C85
      C
      train
    
    
      2
      3
      1
      3
      Heikkinen, Miss. Laina
      female
      26
      0
      0
      STON/O2. 3101282
      7.9250
      NaN
      S
      train
    
    
      3
      4
      1
      1
      Futrelle, Mrs. Jacques Heath (Lily May Peel)
      female
      35
      1
      0
      113803
      53.1000
      C123
      S
      train
    
    
      4
      5
      0
      3
      Allen, Mr. William Henry
      male
      35
      0
      0
      373450
      8.0500
      NaN
      S
      train



In [3]:

    
test = pd.read_csv("data/test.csv")
test["dataset"] = "test"
test.head()









    Out[3]:






  
    
      
      PassengerId
      Pclass
      Name
      Sex
      Age
      SibSp
      Parch
      Ticket
      Fare
      Cabin
      Embarked
      dataset
    
  
  
    
      0
      892
      3
      Kelly, Mr. James
      male
      34.5
      0
      0
      330911
      7.8292
      NaN
      Q
      test
    
    
      1
      893
      3
      Wilkes, Mrs. James (Ellen Needs)
      female
      47.0
      1
      0
      363272
      7.0000
      NaN
      S
      test
    
    
      2
      894
      2
      Myles, Mr. Thomas Francis
      male
      62.0
      0
      0
      240276
      9.6875
      NaN
      Q
      test
    
    
      3
      895
      3
      Wirz, Mr. Albert
      male
      27.0
      0
      0
      315154
      8.6625
      NaN
      S
      test
    
    
      4
      896
      3
      Hirvonen, Mrs. Alexander (Helga E Lindqvist)
      female
      22.0
      1
      1
      3101298
      12.2875
      NaN
      S
      test



In [4]:

    
#Combine both datasets to predict families
train = train.append(test)
train.set_index(train["PassengerId"],inplace=True)

2. Tokenize name into (surname, title, first name and maiden name)



In [5]:

    
name_tokenizer = re.compile(r"^(?P<surname>[^,]+), (?P<title>[A-Z a-z]+?)\. (?P<f_name>[A-Z a-z.]+)?(?P<maiden_name>\([A-Za-z .]+\))?")



In [6]:

    
name_tokens = ["surname","title","f_name","maiden_name"]
for name_tk in name_tokens:
    train[name_tk] = train.Name.apply(lambda x: name_tokenizer.match(x).group(name_tk))
    test[name_tk] = test.Name.apply(lambda x: name_tokenizer.match(x).group(name_tk))
train.head(n=5)









    Out[6]:






  
    
      
      Age
      Cabin
      Embarked
      Fare
      Name
      Parch
      PassengerId
      Pclass
      Sex
      SibSp
      Survived
      Ticket
      dataset
      surname
      title
      f_name
      maiden_name
    
    
      PassengerId
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      22
      NaN
      S
      7.2500
      Braund, Mr. Owen Harris
      0
      1
      3
      male
      1
      0
      A/5 21171
      train
      Braund
      Mr
      Owen Harris
      None
    
    
      2
      38
      C85
      C
      71.2833
      Cumings, Mrs. John Bradley (Florence Briggs Th...
      0
      2
      1
      female
      1
      1
      PC 17599
      train
      Cumings
      Mrs
      John Bradley
      (Florence Briggs Thayer)
    
    
      3
      26
      NaN
      S
      7.9250
      Heikkinen, Miss. Laina
      0
      3
      3
      female
      0
      1
      STON/O2. 3101282
      train
      Heikkinen
      Miss
      Laina
      None
    
    
      4
      35
      C123
      S
      53.1000
      Futrelle, Mrs. Jacques Heath (Lily May Peel)
      0
      4
      1
      female
      1
      1
      113803
      train
      Futrelle
      Mrs
      Jacques Heath
      (Lily May Peel)
    
    
      5
      35
      NaN
      S
      8.0500
      Allen, Mr. William Henry
      0
      5
      3
      male
      0
      0
      373450
      train
      Allen
      Mr
      William Henry
      None

2.1 Extract features from Title variable



In [7]:

    
print train.groupby(["title","Sex"]).size()









    



title         Sex   
Capt          male        1
Col           male        4
Don           male        1
Dona          female      1
Dr            female      1
              male        7
Jonkheer      male        1
Lady          female      1
Major         male        2
Master        male       61
Miss          female    260
Mlle          female      2
Mme           female      1
Mr            male      757
Mrs           female    197
Ms            female      2
Rev           male        8
Sir           male        1
the Countess  female      1
dtype: int64

It seems we can extract some info from title

Whether a woman is married Mme/Mrs vs Miss/Mlle vs Ms(Undetermined or single :/? )
Master title apparently given to male kids
Nobility vs laypeople : (Dr, Col, Capt, ...) vs (Mr,Master,Mrs,Miss). Ambiguous cases (Mlle,Mme,Ms,Don/Dona?)



In [8]:

    
#Encode special title following this logic
train.has_special_title = train.title.apply(lambda x: x not in ["Mr","Mrs","Miss","Mme","Mlle","Master"])

3 Examine marriages / sibling relationships



In [14]:

    
def is_married(couple_rows):
    are_married=False
    if couple_rows.irow(0).Sex != couple_rows.irow(1).Sex:
        #Get who is the husband and whose the wife
        man = couple_rows.irow(0) if couple_rows.irow(0).Sex == "male" else couple_rows.irow(1)
        woman = couple_rows.irow(0) if couple_rows.irow(0).Sex == "female" else couple_rows.irow(1)

        #Marriage tests
        marriage_tests = {}
        marriage_tests["same_f_name"] = woman.f_name is not None and woman.f_name in man.f_name
        marriage_tests["consistent_title"] =  woman.title not in ("Miss","Mlle") and man.title != "Master"
        marriage_tests["same_ticket"] = woman.Ticket == man.Ticket
        marriage_tests["same_pclass"] = woman.Pclass == man.Pclass
        marriage_tests["legal_age"] = (woman.title in ("Mme","Mrs") or woman.Age >= 10) and man.Age > 10
        marriage_tests["consistent_SibSp"] = (woman.SibSp > 0 and man.SibSp > 0) or (woman.SibSp == man.SibSp)

        are_married = marriage_tests["same_f_name"] and marriage_tests["legal_age"] or ( )
        
        consistency_checks = ( marriage_tests["consistent_title"] and 
                               marriage_tests["legal_age"] and 
                              marriage_tests["same_pclass"] and 
                              marriage_tests["same_ticket"] and
                              marriage_tests["consistent_SibSp"])

        if are_married and not consistency_checks:
            failed_tests = ", ".join("{}:{}".format(x,marriage_tests[x]) for x in marriage_tests if not marriage_tests[x])
            print "WARNING: Sketchy marriage: {}".format(failed_tests)
            print couple_rows
            
            print

    return are_married

Initialize data structures for algorithm



In [15]:

    
#Data structures - sets to keep track which ones have already been assigned
married_people = set()
people_with_parents = set()



In [16]:

    
links_to_assign = train[["SibSp","Parch"]]
#Matches a couple with the Max amount of kids they can have 
#Which is the min(husband.Parch, wife.Parch)
marriages_table = {}

1. Extract marriages in greedy fashion. Assume is_married has no fp ( might have actually :/ )



In [17]:

    
#Subset only people who have spouses/siblings on the boat
train_sibsp = train.ix[ train.SibSp > 0]
#People grouped by surname
surname_groups = train_sibsp.groupby("surname").groups



In [18]:

    
for surname in surname_groups:
    surname_rows = surname_groups[surname]
    couples = itertools.combinations(surname_rows,2)
    for cpl in couples:
        cpl_rows = train_sibsp.ix[list(cpl)]
        if is_married(cpl_rows):
            #Make sure we're not marrying somebody twice :p
            assert cpl[0] not in married_people,"{} is already married :/".format(cpl[0])
            assert cpl[1] not in married_people,"{} is already married :/".format(cpl[1])
            
            #add couples to married set
            married_people.add(cpl[0])
            married_people.add(cpl[1])
            
            marriages_table[cpl] = min(links_to_assign.ix[cpl[0]]["Parch"], links_to_assign.ix[cpl[1]]["Parch"] )
            
            
            #print
#    break



In [19]:

    
marriages_table









    Out[19]:





{(26, 1066): 5, (94, 924): 2, (152, 337): 0, (609, 686): 2, (737, 1059): 2}



In [ ]:

    
train.ix[list((26,1066))]



In [26]:

    
train.ix[ (train.SibSp > 0) | (train.Parch > 0) ].shape









    Out[26]:





(519, 17)



In [22]:

    
train









    Out[22]:






  
    
      
      Age
      Cabin
      Embarked
      Fare
      Name
      Parch
      PassengerId
      Pclass
      Sex
      SibSp
      Survived
      Ticket
      dataset
      surname
      title
      f_name
      maiden_name
    
    
      PassengerId
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      22.0
      NaN
      S
      7.2500
      Braund, Mr. Owen Harris
      0
      1
      3
      male
      1
      0
      A/5 21171
      train
      Braund
      Mr
      Owen Harris
      None
    
    
      2
      38.0
      C85
      C
      71.2833
      Cumings, Mrs. John Bradley (Florence Briggs Th...
      0
      2
      1
      female
      1
      1
      PC 17599
      train
      Cumings
      Mrs
      John Bradley
      (Florence Briggs Thayer)
    
    
      3
      26.0
      NaN
      S
      7.9250
      Heikkinen, Miss. Laina
      0
      3
      3
      female
      0
      1
      STON/O2. 3101282
      train
      Heikkinen
      Miss
      Laina
      None
    
    
      4
      35.0
      C123
      S
      53.1000
      Futrelle, Mrs. Jacques Heath (Lily May Peel)
      0
      4
      1
      female
      1
      1
      113803
      train
      Futrelle
      Mrs
      Jacques Heath
      (Lily May Peel)
    
    
      5
      35.0
      NaN
      S
      8.0500
      Allen, Mr. William Henry
      0
      5
      3
      male
      0
      0
      373450
      train
      Allen
      Mr
      William Henry
      None
    
    
      6
      NaN
      NaN
      Q
      8.4583
      Moran, Mr. James
      0
      6
      3
      male
      0
      0
      330877
      train
      Moran
      Mr
      James
      None
    
    
      7
      54.0
      E46
      S
      51.8625
      McCarthy, Mr. Timothy J
      0
      7
      1
      male
      0
      0
      17463
      train
      McCarthy
      Mr
      Timothy J
      None
    
    
      8
      2.0
      NaN
      S
      21.0750
      Palsson, Master. Gosta Leonard
      1
      8
      3
      male
      3
      0
      349909
      train
      Palsson
      Master
      Gosta Leonard
      None
    
    
      9
      27.0
      NaN
      S
      11.1333
      Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
      2
      9
      3
      female
      0
      1
      347742
      train
      Johnson
      Mrs
      Oscar W
      (Elisabeth Vilhelmina Berg)
    
    
      10
      14.0
      NaN
      C
      30.0708
      Nasser, Mrs. Nicholas (Adele Achem)
      0
      10
      2
      female
      1
      1
      237736
      train
      Nasser
      Mrs
      Nicholas
      (Adele Achem)
    
    
      11
      4.0
      G6
      S
      16.7000
      Sandstrom, Miss. Marguerite Rut
      1
      11
      3
      female
      1
      1
      PP 9549
      train
      Sandstrom
      Miss
      Marguerite Rut
      None
    
    
      12
      58.0
      C103
      S
      26.5500
      Bonnell, Miss. Elizabeth
      0
      12
      1
      female
      0
      1
      113783
      train
      Bonnell
      Miss
      Elizabeth
      None
    
    
      13
      20.0
      NaN
      S
      8.0500
      Saundercock, Mr. William Henry
      0
      13
      3
      male
      0
      0
      A/5. 2151
      train
      Saundercock
      Mr
      William Henry
      None
    
    
      14
      39.0
      NaN
      S
      31.2750
      Andersson, Mr. Anders Johan
      5
      14
      3
      male
      1
      0
      347082
      train
      Andersson
      Mr
      Anders Johan
      None
    
    
      15
      14.0
      NaN
      S
      7.8542
      Vestrom, Miss. Hulda Amanda Adolfina
      0
      15
      3
      female
      0
      0
      350406
      train
      Vestrom
      Miss
      Hulda Amanda Adolfina
      None
    
    
      16
      55.0
      NaN
      S
      16.0000
      Hewlett, Mrs. (Mary D Kingcome)
      0
      16
      2
      female
      0
      1
      248706
      train
      Hewlett
      Mrs
      None
      (Mary D Kingcome)
    
    
      17
      2.0
      NaN
      Q
      29.1250
      Rice, Master. Eugene
      1
      17
      3
      male
      4
      0
      382652
      train
      Rice
      Master
      Eugene
      None
    
    
      18
      NaN
      NaN
      S
      13.0000
      Williams, Mr. Charles Eugene
      0
      18
      2
      male
      0
      1
      244373
      train
      Williams
      Mr
      Charles Eugene
      None
    
    
      19
      31.0
      NaN
      S
      18.0000
      Vander Planke, Mrs. Julius (Emelia Maria Vande...
      0
      19
      3
      female
      1
      0
      345763
      train
      Vander Planke
      Mrs
      Julius
      (Emelia Maria Vandemoortele)
    
    
      20
      NaN
      NaN
      C
      7.2250
      Masselmani, Mrs. Fatima
      0
      20
      3
      female
      0
      1
      2649
      train
      Masselmani
      Mrs
      Fatima
      None
    
    
      21
      35.0
      NaN
      S
      26.0000
      Fynney, Mr. Joseph J
      0
      21
      2
      male
      0
      0
      239865
      train
      Fynney
      Mr
      Joseph J
      None
    
    
      22
      34.0
      D56
      S
      13.0000
      Beesley, Mr. Lawrence
      0
      22
      2
      male
      0
      1
      248698
      train
      Beesley
      Mr
      Lawrence
      None
    
    
      23
      15.0
      NaN
      Q
      8.0292
      McGowan, Miss. Anna "Annie"
      0
      23
      3
      female
      0
      1
      330923
      train
      McGowan
      Miss
      Anna
      None
    
    
      24
      28.0
      A6
      S
      35.5000
      Sloper, Mr. William Thompson
      0
      24
      1
      male
      0
      1
      113788
      train
      Sloper
      Mr
      William Thompson
      None
    
    
      25
      8.0
      NaN
      S
      21.0750
      Palsson, Miss. Torborg Danira
      1
      25
      3
      female
      3
      0
      349909
      train
      Palsson
      Miss
      Torborg Danira
      None
    
    
      26
      38.0
      NaN
      S
      31.3875
      Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
      5
      26
      3
      female
      1
      1
      347077
      train
      Asplund
      Mrs
      Carl Oscar
      (Selma Augusta Emilia Johansson)
    
    
      27
      NaN
      NaN
      C
      7.2250
      Emir, Mr. Farred Chehab
      0
      27
      3
      male
      0
      0
      2631
      train
      Emir
      Mr
      Farred Chehab
      None
    
    
      28
      19.0
      C23 C25 C27
      S
      263.0000
      Fortune, Mr. Charles Alexander
      2
      28
      1
      male
      3
      0
      19950
      train
      Fortune
      Mr
      Charles Alexander
      None
    
    
      29
      NaN
      NaN
      Q
      7.8792
      O'Dwyer, Miss. Ellen "Nellie"
      0
      29
      3
      female
      0
      1
      330959
      train
      O'Dwyer
      Miss
      Ellen
      None
    
    
      30
      NaN
      NaN
      S
      7.8958
      Todoroff, Mr. Lalio
      0
      30
      3
      male
      0
      0
      349216
      train
      Todoroff
      Mr
      Lalio
      None
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      1280
      21.0
      NaN
      Q
      7.7500
      Canavan, Mr. Patrick
      0
      1280
      3
      male
      0
      NaN
      364858
      test
      Canavan
      Mr
      Patrick
      None
    
    
      1281
      6.0
      NaN
      S
      21.0750
      Palsson, Master. Paul Folke
      1
      1281
      3
      male
      3
      NaN
      349909
      test
      Palsson
      Master
      Paul Folke
      None
    
    
      1282
      23.0
      B24
      S
      93.5000
      Payne, Mr. Vivian Ponsonby
      0
      1282
      1
      male
      0
      NaN
      12749
      test
      Payne
      Mr
      Vivian Ponsonby
      None
    
    
      1283
      51.0
      D28
      S
      39.4000
      Lines, Mrs. Ernest H (Elizabeth Lindsey James)
      1
      1283
      1
      female
      0
      NaN
      PC 17592
      test
      Lines
      Mrs
      Ernest H
      (Elizabeth Lindsey James)
    
    
      1284
      13.0
      NaN
      S
      20.2500
      Abbott, Master. Eugene Joseph
      2
      1284
      3
      male
      0
      NaN
      C.A. 2673
      test
      Abbott
      Master
      Eugene Joseph
      None
    
    
      1285
      47.0
      NaN
      S
      10.5000
      Gilbert, Mr. William
      0
      1285
      2
      male
      0
      NaN
      C.A. 30769
      test
      Gilbert
      Mr
      William
      None
    
    
      1286
      29.0
      NaN
      S
      22.0250
      Kink-Heilmann, Mr. Anton
      1
      1286
      3
      male
      3
      NaN
      315153
      test
      Kink-Heilmann
      Mr
      Anton
      None
    
    
      1287
      18.0
      C31
      S
      60.0000
      Smith, Mrs. Lucien Philip (Mary Eloise Hughes)
      0
      1287
      1
      female
      1
      NaN
      13695
      test
      Smith
      Mrs
      Lucien Philip
      (Mary Eloise Hughes)
    
    
      1288
      24.0
      NaN
      Q
      7.2500
      Colbert, Mr. Patrick
      0
      1288
      3
      male
      0
      NaN
      371109
      test
      Colbert
      Mr
      Patrick
      None
    
    
      1289
      48.0
      B41
      C
      79.2000
      Frolicher-Stehli, Mrs. Maxmillian (Margaretha ...
      1
      1289
      1
      female
      1
      NaN
      13567
      test
      Frolicher-Stehli
      Mrs
      Maxmillian
      (Margaretha Emerentia Stehli)
    
    
      1290
      22.0
      NaN
      S
      7.7750
      Larsson-Rondberg, Mr. Edvard A
      0
      1290
      3
      male
      0
      NaN
      347065
      test
      Larsson-Rondberg
      Mr
      Edvard A
      None
    
    
      1291
      31.0
      NaN
      Q
      7.7333
      Conlon, Mr. Thomas Henry
      0
      1291
      3
      male
      0
      NaN
      21332
      test
      Conlon
      Mr
      Thomas Henry
      None
    
    
      1292
      30.0
      C7
      S
      164.8667
      Bonnell, Miss. Caroline
      0
      1292
      1
      female
      0
      NaN
      36928
      test
      Bonnell
      Miss
      Caroline
      None
    
    
      1293
      38.0
      NaN
      S
      21.0000
      Gale, Mr. Harry
      0
      1293
      2
      male
      1
      NaN
      28664
      test
      Gale
      Mr
      Harry
      None
    
    
      1294
      22.0
      NaN
      C
      59.4000
      Gibson, Miss. Dorothy Winifred
      1
      1294
      1
      female
      0
      NaN
      112378
      test
      Gibson
      Miss
      Dorothy Winifred
      None
    
    
      1295
      17.0
      NaN
      S
      47.1000
      Carrau, Mr. Jose Pedro
      0
      1295
      1
      male
      0
      NaN
      113059
      test
      Carrau
      Mr
      Jose Pedro
      None
    
    
      1296
      43.0
      D40
      C
      27.7208
      Frauenthal, Mr. Isaac Gerald
      0
      1296
      1
      male
      1
      NaN
      17765
      test
      Frauenthal
      Mr
      Isaac Gerald
      None
    
    
      1297
      20.0
      D38
      C
      13.8625
      Nourney, Mr. Alfred (Baron von Drachstedt")"
      0
      1297
      2
      male
      0
      NaN
      SC/PARIS 2166
      test
      Nourney
      Mr
      Alfred
      None
    
    
      1298
      23.0
      NaN
      S
      10.5000
      Ware, Mr. William Jeffery
      0
      1298
      2
      male
      1
      NaN
      28666
      test
      Ware
      Mr
      William Jeffery
      None
    
    
      1299
      50.0
      C80
      C
      211.5000
      Widener, Mr. George Dunton
      1
      1299
      1
      male
      1
      NaN
      113503
      test
      Widener
      Mr
      George Dunton
      None
    
    
      1300
      NaN
      NaN
      Q
      7.7208
      Riordan, Miss. Johanna Hannah""
      0
      1300
      3
      female
      0
      NaN
      334915
      test
      Riordan
      Miss
      Johanna Hannah
      None
    
    
      1301
      3.0
      NaN
      S
      13.7750
      Peacock, Miss. Treasteall
      1
      1301
      3
      female
      1
      NaN
      SOTON/O.Q. 3101315
      test
      Peacock
      Miss
      Treasteall
      None
    
    
      1302
      NaN
      NaN
      Q
      7.7500
      Naughton, Miss. Hannah
      0
      1302
      3
      female
      0
      NaN
      365237
      test
      Naughton
      Miss
      Hannah
      None
    
    
      1303
      37.0
      C78
      Q
      90.0000
      Minahan, Mrs. William Edward (Lillian E Thorpe)
      0
      1303
      1
      female
      1
      NaN
      19928
      test
      Minahan
      Mrs
      William Edward
      (Lillian E Thorpe)
    
    
      1304
      28.0
      NaN
      S
      7.7750
      Henriksson, Miss. Jenny Lovisa
      0
      1304
      3
      female
      0
      NaN
      347086
      test
      Henriksson
      Miss
      Jenny Lovisa
      None
    
    
      1305
      NaN
      NaN
      S
      8.0500
      Spector, Mr. Woolf
      0
      1305
      3
      male
      0
      NaN
      A.5. 3236
      test
      Spector
      Mr
      Woolf
      None
    
    
      1306
      39.0
      C105
      C
      108.9000
      Oliva y Ocana, Dona. Fermina
      0
      1306
      1
      female
      0
      NaN
      PC 17758
      test
      Oliva y Ocana
      Dona
      Fermina
      None
    
    
      1307
      38.5
      NaN
      S
      7.2500
      Saether, Mr. Simon Sivertsen
      0
      1307
      3
      male
      0
      NaN
      SOTON/O.Q. 3101262
      test
      Saether
      Mr
      Simon Sivertsen
      None
    
    
      1308
      NaN
      NaN
      S
      8.0500
      Ware, Mr. Frederick
      0
      1308
      3
      male
      0
      NaN
      359309
      test
      Ware
      Mr
      Frederick
      None
    
    
      1309
      NaN
      NaN
      C
      22.3583
      Peter, Master. Michael J
      1
      1309
      3
      male
      1
      NaN
      2668
      test
      Peter
      Master
      Michael J
      None
    
  

1309 rows × 17 columns



In [ ]:

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked	dataset
0	1	0	3	Braund, Mr. Owen Harris	male	22	1	A/5 21171	7.2500	NaN	S	train
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38	1	PC 17599	71.2833	C85	C	train
2	3	1	3	Heikkinen, Miss. Laina	female	26	0	STON/O2. 3101282	7.9250	NaN	S	train
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35	1	113803	53.1000	C123	S	train
4	5	0	3	Allen, Mr. William Henry	male	35	0	373450	8.0500	NaN	S	train

	PassengerId	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked	dataset
0	892	3	Kelly, Mr. James	male	34.5	0	0	330911	7.8292	NaN	Q	test
1	893	3	Wilkes, Mrs. James (Ellen Needs)	female	47.0	1	0	363272	7.0000	NaN	S	test
2	894	2	Myles, Mr. Thomas Francis	male	62.0	0	0	240276	9.6875	NaN	Q	test
3	895	3	Wirz, Mr. Albert	male	27.0	0	0	315154	8.6625	NaN	S	test
4	896	3	Hirvonen, Mrs. Alexander (Helga E Lindqvist)	female	22.0	1	1	3101298	12.2875	NaN	S	test

	Age	Cabin	Embarked	Fare	Name	Parch	PassengerId	Pclass	Sex	SibSp	Survived	Ticket	dataset	surname	title	f_name	maiden_name
PassengerId
1	22	NaN	S	7.2500	Braund, Mr. Owen Harris	0	1	3	male	1	0	A/5 21171	train	Braund	Mr	Owen Harris	None
2	38	C85	C	71.2833	Cumings, Mrs. John Bradley (Florence Briggs Th...	0	2	1	female	1	1	PC 17599	train	Cumings	Mrs	John Bradley	(Florence Briggs Thayer)
3	26	NaN	S	7.9250	Heikkinen, Miss. Laina	0	3	3	female	0	1	STON/O2. 3101282	train	Heikkinen	Miss	Laina	None
4	35	C123	S	53.1000	Futrelle, Mrs. Jacques Heath (Lily May Peel)	0	4	1	female	1	1	113803	train	Futrelle	Mrs	Jacques Heath	(Lily May Peel)
5	35	NaN	S	8.0500	Allen, Mr. William Henry	0	5	3	male	0	0	373450	train	Allen	Mr	William Henry	None