In [105]:
# coding=utf-8

import pandas as pd
import numpy as np

train_df = pd.read_csv('./data/train.csv', index_col=False).head(540)
valid_df = pd.read_csv('./data/train.csv', index_col=False)[540:700]
test_df = pd.read_csv('./data/train.csv', index_col=False).tail(191)
target_df = pd.read_csv('./data/test.csv', index_col=False)


def set_missing_ages(df, features, target):
    # 根据所坐舱位等数字讯息推断年龄
    target_df = df[features]
    known = target_df[target_df[target].notnull()].as_matrix()
    unknown = target_df[target_df[target].isnull()].as_matrix()
    y = known[:, 0]
    X = known[:, 1:]
    if len(unknown):
        rfr = RandomForestRegressor(
            random_state=0, n_estimators=2000, n_jobs=-1)
        rfr.fit(X, y)
        predicted = rfr.predict(unknown[:, 1::])
        df.loc[(df[target].isnull()), target] = predicted
    return df

def set_missing_ages_2(df, feature):
    # 根据姓名求年龄中位数
    df['Age'].fillna(-1, inplace=True)
    titles = df['Name'].unique()
    medians = dict()
    for title in titles:
        median = df.Age[(df["Age"] != -1) & (df['Name'] == title)].median()
        medians[title] = median
        
    for index, row in df.iterrows():
        if row['Age'] == -1:
            df.loc[index, 'Age'] = medians[row['Name']]

    return df
# Name Ticket Cabin

def title_keymap_generate(target):
    titles = target.Name.str.extract(' ([A-Za-z]+)\.', expand=False)
    km = titles.unique()
    survived_rate = pd.Series(0.0, index=km)
    for title in km:
        survived_total = target.Survived[titles == title].value_counts()
        if 1 in survived_total:
            survived_rate[title] = float(survived_total[1]) / float(sum(survived_total))
        else:
            survived_rate[title] = 0
    return survived_rate

def sibsp_map_generate(source):
    km = source['SibSp'].unique()
    sib_rate = pd.Series(0.0, index=km)
    for sib in km:
        survived_total = source.Survived[source['SibSp'] == sib].value_counts()
        if 1 in survived_total:
            sib_rate[sib] = float(survived_total[1]) / float(sum(survived_total))
        else:
            sib_rate[sib] = 0
    return sib_rate

survived_rate = title_keymap_generate(train_df)
sib_rate = sibsp_map_generate(train_df)

source = train_df
source.Cabin = source.Cabin.str.extract('([A-Z])\d+', expand=False)
source.Cabin.fillna('NULL', inplace=True)

source.Fare.fillna(source['Fare'].dropna().median(), inplace=True)
dummies_embarked = pd.get_dummies(source['Embarked'], prefix='Embarked')
dummies_cabin = pd.get_dummies(source['Cabin'], prefix='Cabin')
dummies_Pclass = pd.get_dummies(source['Pclass'], prefix='Pclass')
source = pd.concat([source, dummies_embarked, dummies_Pclass, dummies_cabin], axis=1)

source['Title'] = source.Name.str.extract(' ([A-Za-z]+)\.', expand=False).map(survived_rate, na_action=None)
source['Title'].fillna(0.5, inplace=True)

source['Sex'] = source['Sex'].map(lambda x: 1 if x == 'male' else 0)
source['isChild'] = source['Age'].map(lambda x: 1 if x <= 16 else 0)
source['isAlone'] = 0
source['FamilySize'] = source['SibSp'] + source['Parch'] + 1

source['SibSp'] = source['SibSp'].map(sib_rate, na_action=None)
source['SibSp'].fillna(0.5, inplace=True)

source.loc[source['FamilySize'] == 1, 'isAlone'] = 1
source = set_missing_ages(source, ['Age', 'Fare', 'Parch', 'SibSp', 'Pclass'], 'Age')
source = source.filter(regex='isChild|isAlone|Title|Age|SibSp|Parch|Fare|Embarked_.*|Cabin_.*|Sex|Pclass_.*')

source.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 540 entries, 0 to 539
Data columns (total 22 columns):
Sex           540 non-null int64
Age           540 non-null float64
SibSp         540 non-null float64
Parch         540 non-null int64
Fare          540 non-null float64
Embarked_C    540 non-null uint8
Embarked_Q    540 non-null uint8
Embarked_S    540 non-null uint8
Pclass_1      540 non-null uint8
Pclass_2      540 non-null uint8
Pclass_3      540 non-null uint8
Cabin_A       540 non-null uint8
Cabin_B       540 non-null uint8
Cabin_C       540 non-null uint8
Cabin_D       540 non-null uint8
Cabin_E       540 non-null uint8
Cabin_F       540 non-null uint8
Cabin_G       540 non-null uint8
Cabin_NULL    540 non-null uint8
Title         540 non-null float64
isChild       540 non-null int64
isAlone       540 non-null int64
dtypes: float64(4), int64(4), uint8(14)
memory usage: 41.2 KB

In [91]:
source.Fare.fillna(source['Fare'].dropna().median())
source


Out[91]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 G6 S
11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 C103 S
12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 NaN S
13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 NaN S
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 0 350406 7.8542 NaN S
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 NaN S
16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 NaN Q
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 NaN S
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 0 345763 18.0000 NaN S
19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 NaN C
20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 NaN S
21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 D56 S
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.0 0 0 330923 8.0292 NaN Q
23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 A6 S
24 25 0 3 Palsson, Miss. Torborg Danira female 8.0 3 1 349909 21.0750 NaN S
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 5 347077 31.3875 NaN S
26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 NaN C
27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 C23 C25 C27 S
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 NaN Q
29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 NaN S
... ... ... ... ... ... ... ... ... ... ... ... ...
570 571 1 2 Harris, Mr. George male 62.0 0 0 S.W./PP 752 10.5000 NaN S
571 572 1 1 Appleton, Mrs. Edward Dale (Charlotte Lamson) female 53.0 2 0 11769 51.4792 C101 S
572 573 1 1 Flynn, Mr. John Irwin ("Irving") male 36.0 0 0 PC 17474 26.3875 E25 S
573 574 1 3 Kelly, Miss. Mary female NaN 0 0 14312 7.7500 NaN Q
574 575 0 3 Rush, Mr. Alfred George John male 16.0 0 0 A/4. 20589 8.0500 NaN S
575 576 0 3 Patchett, Mr. George male 19.0 0 0 358585 14.5000 NaN S
576 577 1 2 Garside, Miss. Ethel female 34.0 0 0 243880 13.0000 NaN S
577 578 1 1 Silvey, Mrs. William Baird (Alice Munger) female 39.0 1 0 13507 55.9000 E44 S
578 579 0 3 Caram, Mrs. Joseph (Maria Elias) female NaN 1 0 2689 14.4583 NaN C
579 580 1 3 Jussila, Mr. Eiriik male 32.0 0 0 STON/O 2. 3101286 7.9250 NaN S
580 581 1 2 Christy, Miss. Julie Rachel female 25.0 1 1 237789 30.0000 NaN S
581 582 1 1 Thayer, Mrs. John Borland (Marian Longstreth M... female 39.0 1 1 17421 110.8833 C68 C
582 583 0 2 Downton, Mr. William James male 54.0 0 0 28403 26.0000 NaN S
583 584 0 1 Ross, Mr. John Hugo male 36.0 0 0 13049 40.1250 A10 C
584 585 0 3 Paulner, Mr. Uscher male NaN 0 0 3411 8.7125 NaN C
585 586 1 1 Taussig, Miss. Ruth female 18.0 0 2 110413 79.6500 E68 S
586 587 0 2 Jarvis, Mr. John Denzil male 47.0 0 0 237565 15.0000 NaN S
587 588 1 1 Frolicher-Stehli, Mr. Maxmillian male 60.0 1 1 13567 79.2000 B41 C
588 589 0 3 Gilinski, Mr. Eliezer male 22.0 0 0 14973 8.0500 NaN S
589 590 0 3 Murdlin, Mr. Joseph male NaN 0 0 A./5. 3235 8.0500 NaN S
590 591 0 3 Rintamaki, Mr. Matti male 35.0 0 0 STON/O 2. 3101273 7.1250 NaN S
591 592 1 1 Stephenson, Mrs. Walter Bertram (Martha Eustis) female 52.0 1 0 36947 78.2667 D20 C
592 593 0 3 Elsbury, Mr. William James male 47.0 0 0 A/5 3902 7.2500 NaN S
593 594 0 3 Bourke, Miss. Mary female NaN 0 2 364848 7.7500 NaN Q
594 595 0 2 Chapman, Mr. John Henry male 37.0 1 0 SC/AH 29037 26.0000 NaN S
595 596 0 3 Van Impe, Mr. Jean Baptiste male 36.0 1 1 345773 24.1500 NaN S
596 597 1 2 Leitch, Miss. Jessie Wills female NaN 0 0 248727 33.0000 NaN S
597 598 0 3 Johnson, Mr. Alfred male 49.0 0 0 LINE 0.0000 NaN S
598 599 0 3 Boulos, Mr. Hanna male NaN 0 0 2664 7.2250 NaN C
599 600 1 1 Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan") male 49.0 1 0 PC 17485 56.9292 A20 C

600 rows × 12 columns


In [92]:
dummies_embarked = pd.get_dummies(source['Embarked'], prefix='Embarked')
# dummies_sex = pd.get_dummies(source['Sex'], prefix='Sex')
dummies_Pclass = pd.get_dummies(source['Pclass'], prefix='Pclass')
source = pd.concat([source, dummies_embarked, dummies_Pclass], axis=1)
source


Out[92]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S 0 0 1 0 0 1
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C 1 0 0 1 0 0
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S 0 0 1 0 0 1
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S 0 0 1 1 0 0
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S 0 0 1 0 0 1
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q 0 1 0 0 0 1
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S 0 0 1 1 0 0
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S 0 0 1 0 0 1
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S 0 0 1 0 0 1
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C 1 0 0 0 1 0
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 G6 S 0 0 1 0 0 1
11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 C103 S 0 0 1 1 0 0
12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 NaN S 0 0 1 0 0 1
13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 NaN S 0 0 1 0 0 1
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 0 350406 7.8542 NaN S 0 0 1 0 0 1
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 NaN S 0 0 1 0 1 0
16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 NaN Q 0 1 0 0 0 1
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 NaN S 0 0 1 0 1 0
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 0 345763 18.0000 NaN S 0 0 1 0 0 1
19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 NaN C 1 0 0 0 0 1
20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 NaN S 0 0 1 0 1 0
21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 D56 S 0 0 1 0 1 0
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.0 0 0 330923 8.0292 NaN Q 0 1 0 0 0 1
23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 A6 S 0 0 1 1 0 0
24 25 0 3 Palsson, Miss. Torborg Danira female 8.0 3 1 349909 21.0750 NaN S 0 0 1 0 0 1
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 5 347077 31.3875 NaN S 0 0 1 0 0 1
26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 NaN C 1 0 0 0 0 1
27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 C23 C25 C27 S 0 0 1 1 0 0
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 NaN Q 0 1 0 0 0 1
29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 NaN S 0 0 1 0 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
570 571 1 2 Harris, Mr. George male 62.0 0 0 S.W./PP 752 10.5000 NaN S 0 0 1 0 1 0
571 572 1 1 Appleton, Mrs. Edward Dale (Charlotte Lamson) female 53.0 2 0 11769 51.4792 C101 S 0 0 1 1 0 0
572 573 1 1 Flynn, Mr. John Irwin ("Irving") male 36.0 0 0 PC 17474 26.3875 E25 S 0 0 1 1 0 0
573 574 1 3 Kelly, Miss. Mary female NaN 0 0 14312 7.7500 NaN Q 0 1 0 0 0 1
574 575 0 3 Rush, Mr. Alfred George John male 16.0 0 0 A/4. 20589 8.0500 NaN S 0 0 1 0 0 1
575 576 0 3 Patchett, Mr. George male 19.0 0 0 358585 14.5000 NaN S 0 0 1 0 0 1
576 577 1 2 Garside, Miss. Ethel female 34.0 0 0 243880 13.0000 NaN S 0 0 1 0 1 0
577 578 1 1 Silvey, Mrs. William Baird (Alice Munger) female 39.0 1 0 13507 55.9000 E44 S 0 0 1 1 0 0
578 579 0 3 Caram, Mrs. Joseph (Maria Elias) female NaN 1 0 2689 14.4583 NaN C 1 0 0 0 0 1
579 580 1 3 Jussila, Mr. Eiriik male 32.0 0 0 STON/O 2. 3101286 7.9250 NaN S 0 0 1 0 0 1
580 581 1 2 Christy, Miss. Julie Rachel female 25.0 1 1 237789 30.0000 NaN S 0 0 1 0 1 0
581 582 1 1 Thayer, Mrs. John Borland (Marian Longstreth M... female 39.0 1 1 17421 110.8833 C68 C 1 0 0 1 0 0
582 583 0 2 Downton, Mr. William James male 54.0 0 0 28403 26.0000 NaN S 0 0 1 0 1 0
583 584 0 1 Ross, Mr. John Hugo male 36.0 0 0 13049 40.1250 A10 C 1 0 0 1 0 0
584 585 0 3 Paulner, Mr. Uscher male NaN 0 0 3411 8.7125 NaN C 1 0 0 0 0 1
585 586 1 1 Taussig, Miss. Ruth female 18.0 0 2 110413 79.6500 E68 S 0 0 1 1 0 0
586 587 0 2 Jarvis, Mr. John Denzil male 47.0 0 0 237565 15.0000 NaN S 0 0 1 0 1 0
587 588 1 1 Frolicher-Stehli, Mr. Maxmillian male 60.0 1 1 13567 79.2000 B41 C 1 0 0 1 0 0
588 589 0 3 Gilinski, Mr. Eliezer male 22.0 0 0 14973 8.0500 NaN S 0 0 1 0 0 1
589 590 0 3 Murdlin, Mr. Joseph male NaN 0 0 A./5. 3235 8.0500 NaN S 0 0 1 0 0 1
590 591 0 3 Rintamaki, Mr. Matti male 35.0 0 0 STON/O 2. 3101273 7.1250 NaN S 0 0 1 0 0 1
591 592 1 1 Stephenson, Mrs. Walter Bertram (Martha Eustis) female 52.0 1 0 36947 78.2667 D20 C 1 0 0 1 0 0
592 593 0 3 Elsbury, Mr. William James male 47.0 0 0 A/5 3902 7.2500 NaN S 0 0 1 0 0 1
593 594 0 3 Bourke, Miss. Mary female NaN 0 2 364848 7.7500 NaN Q 0 1 0 0 0 1
594 595 0 2 Chapman, Mr. John Henry male 37.0 1 0 SC/AH 29037 26.0000 NaN S 0 0 1 0 1 0
595 596 0 3 Van Impe, Mr. Jean Baptiste male 36.0 1 1 345773 24.1500 NaN S 0 0 1 0 0 1
596 597 1 2 Leitch, Miss. Jessie Wills female NaN 0 0 248727 33.0000 NaN S 0 0 1 0 1 0
597 598 0 3 Johnson, Mr. Alfred male 49.0 0 0 LINE 0.0000 NaN S 0 0 1 0 0 1
598 599 0 3 Boulos, Mr. Hanna male NaN 0 0 2664 7.2250 NaN C 1 0 0 0 0 1
599 600 1 1 Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan") male 49.0 1 0 PC 17485 56.9292 A20 C 1 0 0 1 0 0

600 rows × 18 columns


In [93]:
source['Title'] = source.Name.str.extract(' ([A-Za-z]+)\.', expand=False)
source['Title']


Out[93]:
0          Mr
1         Mrs
2        Miss
3         Mrs
4          Mr
5          Mr
6          Mr
7      Master
8         Mrs
9         Mrs
10       Miss
11       Miss
12         Mr
13         Mr
14       Miss
15        Mrs
16     Master
17         Mr
18        Mrs
19        Mrs
20         Mr
21         Mr
22       Miss
23         Mr
24       Miss
25        Mrs
26         Mr
27         Mr
28       Miss
29         Mr
        ...  
570        Mr
571       Mrs
572        Mr
573      Miss
574        Mr
575        Mr
576      Miss
577       Mrs
578       Mrs
579        Mr
580      Miss
581       Mrs
582        Mr
583        Mr
584        Mr
585      Miss
586        Mr
587        Mr
588        Mr
589        Mr
590        Mr
591       Mrs
592        Mr
593      Miss
594        Mr
595        Mr
596      Miss
597        Mr
598        Mr
599       Sir
Name: Title, Length: 600, dtype: object

In [1]:
source.Title.unique()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-e6ed9908333a> in <module>()
----> 1 source.Title.unique()
      2 source['agepower'] = source['Age'] ^ 2

NameError: name 'source' is not defined

In [95]:
import matplotlib.pyplot as plt
%matplotlib inline
fig = plt.figure(figsize=(15,6))

km = source['Title'].unique()
survived_rate = pd.Series(0.0, index=km)
for title in km:
    title_df = source.Survived[source['Title'] == title]
    survived_total = source.Survived[source['Title'] == title].value_counts()
    if 1 in survived_total:
        survived_rate[title] = float(survived_total[1]) / float(sum(survived_total))
    else:
        survived_rate[title] = 0
source['Title'] = source['Title'].map(survived_rate)
source


Out[95]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3 Title
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S 0 0 1 0 0 1 0.155882
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C 1 0 0 1 0 0 0.811765
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S 0 0 1 0 0 1 0.703704
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S 0 0 1 1 0 0 0.811765
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S 0 0 1 0 0 1 0.155882
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q 0 1 0 0 0 1 0.155882
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S 0 0 1 1 0 0 0.155882
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S 0 0 1 0 0 1 0.518519
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S 0 0 1 0 0 1 0.811765
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C 1 0 0 0 1 0 0.811765
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 G6 S 0 0 1 0 0 1 0.703704
11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 C103 S 0 0 1 1 0 0 0.703704
12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 NaN S 0 0 1 0 0 1 0.155882
13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 NaN S 0 0 1 0 0 1 0.155882
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 0 350406 7.8542 NaN S 0 0 1 0 0 1 0.703704
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 NaN S 0 0 1 0 1 0 0.811765
16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 NaN Q 0 1 0 0 0 1 0.518519
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 NaN S 0 0 1 0 1 0 0.155882
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 0 345763 18.0000 NaN S 0 0 1 0 0 1 0.811765
19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 NaN C 1 0 0 0 0 1 0.811765
20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 NaN S 0 0 1 0 1 0 0.155882
21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 D56 S 0 0 1 0 1 0 0.155882
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.0 0 0 330923 8.0292 NaN Q 0 1 0 0 0 1 0.703704
23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 A6 S 0 0 1 1 0 0 0.155882
24 25 0 3 Palsson, Miss. Torborg Danira female 8.0 3 1 349909 21.0750 NaN S 0 0 1 0 0 1 0.703704
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 5 347077 31.3875 NaN S 0 0 1 0 0 1 0.811765
26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 NaN C 1 0 0 0 0 1 0.155882
27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 C23 C25 C27 S 0 0 1 1 0 0 0.155882
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 NaN Q 0 1 0 0 0 1 0.703704
29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 NaN S 0 0 1 0 0 1 0.155882
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
570 571 1 2 Harris, Mr. George male 62.0 0 0 S.W./PP 752 10.5000 NaN S 0 0 1 0 1 0 0.155882
571 572 1 1 Appleton, Mrs. Edward Dale (Charlotte Lamson) female 53.0 2 0 11769 51.4792 C101 S 0 0 1 1 0 0 0.811765
572 573 1 1 Flynn, Mr. John Irwin ("Irving") male 36.0 0 0 PC 17474 26.3875 E25 S 0 0 1 1 0 0 0.155882
573 574 1 3 Kelly, Miss. Mary female NaN 0 0 14312 7.7500 NaN Q 0 1 0 0 0 1 0.703704
574 575 0 3 Rush, Mr. Alfred George John male 16.0 0 0 A/4. 20589 8.0500 NaN S 0 0 1 0 0 1 0.155882
575 576 0 3 Patchett, Mr. George male 19.0 0 0 358585 14.5000 NaN S 0 0 1 0 0 1 0.155882
576 577 1 2 Garside, Miss. Ethel female 34.0 0 0 243880 13.0000 NaN S 0 0 1 0 1 0 0.703704
577 578 1 1 Silvey, Mrs. William Baird (Alice Munger) female 39.0 1 0 13507 55.9000 E44 S 0 0 1 1 0 0 0.811765
578 579 0 3 Caram, Mrs. Joseph (Maria Elias) female NaN 1 0 2689 14.4583 NaN C 1 0 0 0 0 1 0.811765
579 580 1 3 Jussila, Mr. Eiriik male 32.0 0 0 STON/O 2. 3101286 7.9250 NaN S 0 0 1 0 0 1 0.155882
580 581 1 2 Christy, Miss. Julie Rachel female 25.0 1 1 237789 30.0000 NaN S 0 0 1 0 1 0 0.703704
581 582 1 1 Thayer, Mrs. John Borland (Marian Longstreth M... female 39.0 1 1 17421 110.8833 C68 C 1 0 0 1 0 0 0.811765
582 583 0 2 Downton, Mr. William James male 54.0 0 0 28403 26.0000 NaN S 0 0 1 0 1 0 0.155882
583 584 0 1 Ross, Mr. John Hugo male 36.0 0 0 13049 40.1250 A10 C 1 0 0 1 0 0 0.155882
584 585 0 3 Paulner, Mr. Uscher male NaN 0 0 3411 8.7125 NaN C 1 0 0 0 0 1 0.155882
585 586 1 1 Taussig, Miss. Ruth female 18.0 0 2 110413 79.6500 E68 S 0 0 1 1 0 0 0.703704
586 587 0 2 Jarvis, Mr. John Denzil male 47.0 0 0 237565 15.0000 NaN S 0 0 1 0 1 0 0.155882
587 588 1 1 Frolicher-Stehli, Mr. Maxmillian male 60.0 1 1 13567 79.2000 B41 C 1 0 0 1 0 0 0.155882
588 589 0 3 Gilinski, Mr. Eliezer male 22.0 0 0 14973 8.0500 NaN S 0 0 1 0 0 1 0.155882
589 590 0 3 Murdlin, Mr. Joseph male NaN 0 0 A./5. 3235 8.0500 NaN S 0 0 1 0 0 1 0.155882
590 591 0 3 Rintamaki, Mr. Matti male 35.0 0 0 STON/O 2. 3101273 7.1250 NaN S 0 0 1 0 0 1 0.155882
591 592 1 1 Stephenson, Mrs. Walter Bertram (Martha Eustis) female 52.0 1 0 36947 78.2667 D20 C 1 0 0 1 0 0 0.811765
592 593 0 3 Elsbury, Mr. William James male 47.0 0 0 A/5 3902 7.2500 NaN S 0 0 1 0 0 1 0.155882
593 594 0 3 Bourke, Miss. Mary female NaN 0 2 364848 7.7500 NaN Q 0 1 0 0 0 1 0.703704
594 595 0 2 Chapman, Mr. John Henry male 37.0 1 0 SC/AH 29037 26.0000 NaN S 0 0 1 0 1 0 0.155882
595 596 0 3 Van Impe, Mr. Jean Baptiste male 36.0 1 1 345773 24.1500 NaN S 0 0 1 0 0 1 0.155882
596 597 1 2 Leitch, Miss. Jessie Wills female NaN 0 0 248727 33.0000 NaN S 0 0 1 0 1 0 0.703704
597 598 0 3 Johnson, Mr. Alfred male 49.0 0 0 LINE 0.0000 NaN S 0 0 1 0 0 1 0.155882
598 599 0 3 Boulos, Mr. Hanna male NaN 0 0 2664 7.2250 NaN C 1 0 0 0 0 1 0.155882
599 600 1 1 Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan") male 49.0 1 0 PC 17485 56.9292 A20 C 1 0 0 1 0 0 1.000000

600 rows × 19 columns

<matplotlib.figure.Figure at 0x10bcbb860>

In [96]:
source['isChild'] = source['Age'].map(lambda x: 1 if x <= 16 else 0)
source


Out[96]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3 Title isChild
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S 0 0 1 0 0 1 0.155882 0
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C 1 0 0 1 0 0 0.811765 0
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S 0 0 1 0 0 1 0.703704 0
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S 0 0 1 1 0 0 0.811765 0
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S 0 0 1 0 0 1 0.155882 0
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q 0 1 0 0 0 1 0.155882 0
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S 0 0 1 1 0 0 0.155882 0
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S 0 0 1 0 0 1 0.518519 1
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S 0 0 1 0 0 1 0.811765 0
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C 1 0 0 0 1 0 0.811765 1
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 G6 S 0 0 1 0 0 1 0.703704 1
11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 C103 S 0 0 1 1 0 0 0.703704 0
12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 NaN S 0 0 1 0 0 1 0.155882 0
13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 NaN S 0 0 1 0 0 1 0.155882 0
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 0 350406 7.8542 NaN S 0 0 1 0 0 1 0.703704 1
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 NaN S 0 0 1 0 1 0 0.811765 0
16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 NaN Q 0 1 0 0 0 1 0.518519 1
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 NaN S 0 0 1 0 1 0 0.155882 0
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 0 345763 18.0000 NaN S 0 0 1 0 0 1 0.811765 0
19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 NaN C 1 0 0 0 0 1 0.811765 0
20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 NaN S 0 0 1 0 1 0 0.155882 0
21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 D56 S 0 0 1 0 1 0 0.155882 0
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.0 0 0 330923 8.0292 NaN Q 0 1 0 0 0 1 0.703704 1
23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 A6 S 0 0 1 1 0 0 0.155882 0
24 25 0 3 Palsson, Miss. Torborg Danira female 8.0 3 1 349909 21.0750 NaN S 0 0 1 0 0 1 0.703704 1
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 5 347077 31.3875 NaN S 0 0 1 0 0 1 0.811765 0
26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 NaN C 1 0 0 0 0 1 0.155882 0
27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 C23 C25 C27 S 0 0 1 1 0 0 0.155882 0
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 NaN Q 0 1 0 0 0 1 0.703704 0
29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 NaN S 0 0 1 0 0 1 0.155882 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
570 571 1 2 Harris, Mr. George male 62.0 0 0 S.W./PP 752 10.5000 NaN S 0 0 1 0 1 0 0.155882 0
571 572 1 1 Appleton, Mrs. Edward Dale (Charlotte Lamson) female 53.0 2 0 11769 51.4792 C101 S 0 0 1 1 0 0 0.811765 0
572 573 1 1 Flynn, Mr. John Irwin ("Irving") male 36.0 0 0 PC 17474 26.3875 E25 S 0 0 1 1 0 0 0.155882 0
573 574 1 3 Kelly, Miss. Mary female NaN 0 0 14312 7.7500 NaN Q 0 1 0 0 0 1 0.703704 0
574 575 0 3 Rush, Mr. Alfred George John male 16.0 0 0 A/4. 20589 8.0500 NaN S 0 0 1 0 0 1 0.155882 1
575 576 0 3 Patchett, Mr. George male 19.0 0 0 358585 14.5000 NaN S 0 0 1 0 0 1 0.155882 0
576 577 1 2 Garside, Miss. Ethel female 34.0 0 0 243880 13.0000 NaN S 0 0 1 0 1 0 0.703704 0
577 578 1 1 Silvey, Mrs. William Baird (Alice Munger) female 39.0 1 0 13507 55.9000 E44 S 0 0 1 1 0 0 0.811765 0
578 579 0 3 Caram, Mrs. Joseph (Maria Elias) female NaN 1 0 2689 14.4583 NaN C 1 0 0 0 0 1 0.811765 0
579 580 1 3 Jussila, Mr. Eiriik male 32.0 0 0 STON/O 2. 3101286 7.9250 NaN S 0 0 1 0 0 1 0.155882 0
580 581 1 2 Christy, Miss. Julie Rachel female 25.0 1 1 237789 30.0000 NaN S 0 0 1 0 1 0 0.703704 0
581 582 1 1 Thayer, Mrs. John Borland (Marian Longstreth M... female 39.0 1 1 17421 110.8833 C68 C 1 0 0 1 0 0 0.811765 0
582 583 0 2 Downton, Mr. William James male 54.0 0 0 28403 26.0000 NaN S 0 0 1 0 1 0 0.155882 0
583 584 0 1 Ross, Mr. John Hugo male 36.0 0 0 13049 40.1250 A10 C 1 0 0 1 0 0 0.155882 0
584 585 0 3 Paulner, Mr. Uscher male NaN 0 0 3411 8.7125 NaN C 1 0 0 0 0 1 0.155882 0
585 586 1 1 Taussig, Miss. Ruth female 18.0 0 2 110413 79.6500 E68 S 0 0 1 1 0 0 0.703704 0
586 587 0 2 Jarvis, Mr. John Denzil male 47.0 0 0 237565 15.0000 NaN S 0 0 1 0 1 0 0.155882 0
587 588 1 1 Frolicher-Stehli, Mr. Maxmillian male 60.0 1 1 13567 79.2000 B41 C 1 0 0 1 0 0 0.155882 0
588 589 0 3 Gilinski, Mr. Eliezer male 22.0 0 0 14973 8.0500 NaN S 0 0 1 0 0 1 0.155882 0
589 590 0 3 Murdlin, Mr. Joseph male NaN 0 0 A./5. 3235 8.0500 NaN S 0 0 1 0 0 1 0.155882 0
590 591 0 3 Rintamaki, Mr. Matti male 35.0 0 0 STON/O 2. 3101273 7.1250 NaN S 0 0 1 0 0 1 0.155882 0
591 592 1 1 Stephenson, Mrs. Walter Bertram (Martha Eustis) female 52.0 1 0 36947 78.2667 D20 C 1 0 0 1 0 0 0.811765 0
592 593 0 3 Elsbury, Mr. William James male 47.0 0 0 A/5 3902 7.2500 NaN S 0 0 1 0 0 1 0.155882 0
593 594 0 3 Bourke, Miss. Mary female NaN 0 2 364848 7.7500 NaN Q 0 1 0 0 0 1 0.703704 0
594 595 0 2 Chapman, Mr. John Henry male 37.0 1 0 SC/AH 29037 26.0000 NaN S 0 0 1 0 1 0 0.155882 0
595 596 0 3 Van Impe, Mr. Jean Baptiste male 36.0 1 1 345773 24.1500 NaN S 0 0 1 0 0 1 0.155882 0
596 597 1 2 Leitch, Miss. Jessie Wills female NaN 0 0 248727 33.0000 NaN S 0 0 1 0 1 0 0.703704 0
597 598 0 3 Johnson, Mr. Alfred male 49.0 0 0 LINE 0.0000 NaN S 0 0 1 0 0 1 0.155882 0
598 599 0 3 Boulos, Mr. Hanna male NaN 0 0 2664 7.2250 NaN C 1 0 0 0 0 1 0.155882 0
599 600 1 1 Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan") male 49.0 1 0 PC 17485 56.9292 A20 C 1 0 0 1 0 0 1.000000 0

600 rows × 20 columns


In [97]:
source['isAlone'] = 0
source['FamilySize'] = source['SibSp'] + source['Parch'] + 1
source.loc[source['FamilySize'] == 1, 'isAlone'] = 1

source


Out[97]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare ... Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3 Title isChild isAlone FamilySize
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 ... 0 0 1 0 0 1 0.155882 0 0 2
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 ... 1 0 0 1 0 0 0.811765 0 0 2
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 ... 0 0 1 0 0 1 0.703704 0 1 1
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 ... 0 0 1 1 0 0 0.811765 0 0 2
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 ... 0 0 1 0 0 1 0.155882 0 1 1
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 ... 0 1 0 0 0 1 0.155882 0 1 1
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 ... 0 0 1 1 0 0 0.155882 0 1 1
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 ... 0 0 1 0 0 1 0.518519 1 0 5
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 ... 0 0 1 0 0 1 0.811765 0 0 3
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 ... 1 0 0 0 1 0 0.811765 1 0 2
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 ... 0 0 1 0 0 1 0.703704 1 0 3
11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 ... 0 0 1 1 0 0 0.703704 0 1 1
12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 ... 0 0 1 0 0 1 0.155882 0 1 1
13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 ... 0 0 1 0 0 1 0.155882 0 0 7
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 0 350406 7.8542 ... 0 0 1 0 0 1 0.703704 1 1 1
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 ... 0 0 1 0 1 0 0.811765 0 1 1
16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 ... 0 1 0 0 0 1 0.518519 1 0 6
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 ... 0 0 1 0 1 0 0.155882 0 1 1
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 0 345763 18.0000 ... 0 0 1 0 0 1 0.811765 0 0 2
19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 ... 1 0 0 0 0 1 0.811765 0 1 1
20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 ... 0 0 1 0 1 0 0.155882 0 1 1
21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 ... 0 0 1 0 1 0 0.155882 0 1 1
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.0 0 0 330923 8.0292 ... 0 1 0 0 0 1 0.703704 1 1 1
23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 ... 0 0 1 1 0 0 0.155882 0 1 1
24 25 0 3 Palsson, Miss. Torborg Danira female 8.0 3 1 349909 21.0750 ... 0 0 1 0 0 1 0.703704 1 0 5
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 5 347077 31.3875 ... 0 0 1 0 0 1 0.811765 0 0 7
26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 ... 1 0 0 0 0 1 0.155882 0 1 1
27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 ... 0 0 1 1 0 0 0.155882 0 0 6
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 ... 0 1 0 0 0 1 0.703704 0 1 1
29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 ... 0 0 1 0 0 1 0.155882 0 1 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
570 571 1 2 Harris, Mr. George male 62.0 0 0 S.W./PP 752 10.5000 ... 0 0 1 0 1 0 0.155882 0 1 1
571 572 1 1 Appleton, Mrs. Edward Dale (Charlotte Lamson) female 53.0 2 0 11769 51.4792 ... 0 0 1 1 0 0 0.811765 0 0 3
572 573 1 1 Flynn, Mr. John Irwin ("Irving") male 36.0 0 0 PC 17474 26.3875 ... 0 0 1 1 0 0 0.155882 0 1 1
573 574 1 3 Kelly, Miss. Mary female NaN 0 0 14312 7.7500 ... 0 1 0 0 0 1 0.703704 0 1 1
574 575 0 3 Rush, Mr. Alfred George John male 16.0 0 0 A/4. 20589 8.0500 ... 0 0 1 0 0 1 0.155882 1 1 1
575 576 0 3 Patchett, Mr. George male 19.0 0 0 358585 14.5000 ... 0 0 1 0 0 1 0.155882 0 1 1
576 577 1 2 Garside, Miss. Ethel female 34.0 0 0 243880 13.0000 ... 0 0 1 0 1 0 0.703704 0 1 1
577 578 1 1 Silvey, Mrs. William Baird (Alice Munger) female 39.0 1 0 13507 55.9000 ... 0 0 1 1 0 0 0.811765 0 0 2
578 579 0 3 Caram, Mrs. Joseph (Maria Elias) female NaN 1 0 2689 14.4583 ... 1 0 0 0 0 1 0.811765 0 0 2
579 580 1 3 Jussila, Mr. Eiriik male 32.0 0 0 STON/O 2. 3101286 7.9250 ... 0 0 1 0 0 1 0.155882 0 1 1
580 581 1 2 Christy, Miss. Julie Rachel female 25.0 1 1 237789 30.0000 ... 0 0 1 0 1 0 0.703704 0 0 3
581 582 1 1 Thayer, Mrs. John Borland (Marian Longstreth M... female 39.0 1 1 17421 110.8833 ... 1 0 0 1 0 0 0.811765 0 0 3
582 583 0 2 Downton, Mr. William James male 54.0 0 0 28403 26.0000 ... 0 0 1 0 1 0 0.155882 0 1 1
583 584 0 1 Ross, Mr. John Hugo male 36.0 0 0 13049 40.1250 ... 1 0 0 1 0 0 0.155882 0 1 1
584 585 0 3 Paulner, Mr. Uscher male NaN 0 0 3411 8.7125 ... 1 0 0 0 0 1 0.155882 0 1 1
585 586 1 1 Taussig, Miss. Ruth female 18.0 0 2 110413 79.6500 ... 0 0 1 1 0 0 0.703704 0 0 3
586 587 0 2 Jarvis, Mr. John Denzil male 47.0 0 0 237565 15.0000 ... 0 0 1 0 1 0 0.155882 0 1 1
587 588 1 1 Frolicher-Stehli, Mr. Maxmillian male 60.0 1 1 13567 79.2000 ... 1 0 0 1 0 0 0.155882 0 0 3
588 589 0 3 Gilinski, Mr. Eliezer male 22.0 0 0 14973 8.0500 ... 0 0 1 0 0 1 0.155882 0 1 1
589 590 0 3 Murdlin, Mr. Joseph male NaN 0 0 A./5. 3235 8.0500 ... 0 0 1 0 0 1 0.155882 0 1 1
590 591 0 3 Rintamaki, Mr. Matti male 35.0 0 0 STON/O 2. 3101273 7.1250 ... 0 0 1 0 0 1 0.155882 0 1 1
591 592 1 1 Stephenson, Mrs. Walter Bertram (Martha Eustis) female 52.0 1 0 36947 78.2667 ... 1 0 0 1 0 0 0.811765 0 0 2
592 593 0 3 Elsbury, Mr. William James male 47.0 0 0 A/5 3902 7.2500 ... 0 0 1 0 0 1 0.155882 0 1 1
593 594 0 3 Bourke, Miss. Mary female NaN 0 2 364848 7.7500 ... 0 1 0 0 0 1 0.703704 0 0 3
594 595 0 2 Chapman, Mr. John Henry male 37.0 1 0 SC/AH 29037 26.0000 ... 0 0 1 0 1 0 0.155882 0 0 2
595 596 0 3 Van Impe, Mr. Jean Baptiste male 36.0 1 1 345773 24.1500 ... 0 0 1 0 0 1 0.155882 0 0 3
596 597 1 2 Leitch, Miss. Jessie Wills female NaN 0 0 248727 33.0000 ... 0 0 1 0 1 0 0.703704 0 1 1
597 598 0 3 Johnson, Mr. Alfred male 49.0 0 0 LINE 0.0000 ... 0 0 1 0 0 1 0.155882 0 1 1
598 599 0 3 Boulos, Mr. Hanna male NaN 0 0 2664 7.2250 ... 1 0 0 0 0 1 0.155882 0 1 1
599 600 1 1 Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan") male 49.0 1 0 PC 17485 56.9292 ... 1 0 0 1 0 0 1.000000 0 0 2

600 rows × 22 columns


In [99]:
from sklearn.ensemble import RandomForestRegressor
def set_missing_ages(df, features, target):
    # 根据所坐舱位等数字讯息推断年龄
    target_df = df[features]
    known = target_df[target_df[target].notnull()].as_matrix()
    unknown = target_df[target_df[target].isnull()].as_matrix()
    y = known[:, 0]
    X = known[:, 1:]
    if len(unknown):
        rfr = RandomForestRegressor(
            random_state=0, n_estimators=2000, n_jobs=-1)
        rfr.fit(X, y)
        predicted = rfr.predict(unknown[:, 1::])
        df.loc[(df[target].isnull()), target] = predicted
    return df
source = set_missing_ages(source, ['Age', 'Fare', 'Parch', 'SibSp', 'Pclass'], 'Age')
source['Sex'] = source['Sex'].map(lambda x: 1 if x == 'male' else 0)
source2 = source.filter(regex='isChild|isAlone|Title|Age|SibSp|Parch|Fare|Embarked_.*|Sex|Pclass_.*')
source2


Out[99]:
Sex Age SibSp Parch Fare Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3 Title isChild isAlone
0 1 22.000000 1 0 7.2500 0 0 1 0 0 1 0.155882 0 0
1 0 38.000000 1 0 71.2833 1 0 0 1 0 0 0.811765 0 0
2 0 26.000000 0 0 7.9250 0 0 1 0 0 1 0.703704 0 1
3 0 35.000000 1 0 53.1000 0 0 1 1 0 0 0.811765 0 0
4 1 35.000000 0 0 8.0500 0 0 1 0 0 1 0.155882 0 1
5 1 31.459834 0 0 8.4583 0 1 0 0 0 1 0.155882 0 1
6 1 54.000000 0 0 51.8625 0 0 1 1 0 0 0.155882 0 1
7 1 2.000000 3 1 21.0750 0 0 1 0 0 1 0.518519 1 0
8 0 27.000000 0 2 11.1333 0 0 1 0 0 1 0.811765 0 0
9 0 14.000000 1 0 30.0708 1 0 0 0 1 0 0.811765 1 0
10 0 4.000000 1 1 16.7000 0 0 1 0 0 1 0.703704 1 0
11 0 58.000000 0 0 26.5500 0 0 1 1 0 0 0.703704 0 1
12 1 20.000000 0 0 8.0500 0 0 1 0 0 1 0.155882 0 1
13 1 39.000000 1 5 31.2750 0 0 1 0 0 1 0.155882 0 0
14 0 14.000000 0 0 7.8542 0 0 1 0 0 1 0.703704 1 1
15 0 55.000000 0 0 16.0000 0 0 1 0 1 0 0.811765 0 1
16 1 2.000000 4 1 29.1250 0 1 0 0 0 1 0.518519 1 0
17 1 31.621829 0 0 13.0000 0 0 1 0 1 0 0.155882 0 1
18 0 31.000000 1 0 18.0000 0 0 1 0 0 1 0.811765 0 0
19 0 32.150027 0 0 7.2250 1 0 0 0 0 1 0.811765 0 1
20 1 35.000000 0 0 26.0000 0 0 1 0 1 0 0.155882 0 1
21 1 34.000000 0 0 13.0000 0 0 1 0 1 0 0.155882 0 1
22 0 15.000000 0 0 8.0292 0 1 0 0 0 1 0.703704 1 1
23 1 28.000000 0 0 35.5000 0 0 1 1 0 0 0.155882 0 1
24 0 8.000000 3 1 21.0750 0 0 1 0 0 1 0.703704 1 0
25 0 38.000000 1 5 31.3875 0 0 1 0 0 1 0.811765 0 0
26 1 32.150027 0 0 7.2250 1 0 0 0 0 1 0.155882 0 1
27 1 19.000000 3 2 263.0000 0 0 1 1 0 0 0.155882 0 0
28 0 22.625180 0 0 7.8792 0 1 0 0 0 1 0.703704 0 1
29 1 29.032518 0 0 7.8958 0 0 1 0 0 1 0.155882 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
570 1 62.000000 0 0 10.5000 0 0 1 0 1 0 0.155882 0 1
571 0 53.000000 2 0 51.4792 0 0 1 1 0 0 0.811765 0 0
572 1 36.000000 0 0 26.3875 0 0 1 1 0 0 0.155882 0 1
573 0 38.173957 0 0 7.7500 0 1 0 0 0 1 0.703704 0 1
574 1 16.000000 0 0 8.0500 0 0 1 0 0 1 0.155882 1 1
575 1 19.000000 0 0 14.5000 0 0 1 0 0 1 0.155882 0 1
576 0 34.000000 0 0 13.0000 0 0 1 0 1 0 0.703704 0 1
577 0 39.000000 1 0 55.9000 0 0 1 1 0 0 0.811765 0 0
578 0 20.313637 1 0 14.4583 1 0 0 0 0 1 0.811765 0 0
579 1 32.000000 0 0 7.9250 0 0 1 0 0 1 0.155882 0 1
580 0 25.000000 1 1 30.0000 0 0 1 0 1 0 0.703704 0 0
581 0 39.000000 1 1 110.8833 1 0 0 1 0 0 0.811765 0 0
582 1 54.000000 0 0 26.0000 0 0 1 0 1 0 0.155882 0 1
583 1 36.000000 0 0 40.1250 1 0 0 1 0 0 0.155882 0 1
584 1 28.018904 0 0 8.7125 1 0 0 0 0 1 0.155882 0 1
585 0 18.000000 0 2 79.6500 0 0 1 1 0 0 0.703704 0 0
586 1 47.000000 0 0 15.0000 0 0 1 0 1 0 0.155882 0 1
587 1 60.000000 1 1 79.2000 1 0 0 1 0 0 0.155882 0 0
588 1 22.000000 0 0 8.0500 0 0 1 0 0 1 0.155882 0 1
589 1 28.839197 0 0 8.0500 0 0 1 0 0 1 0.155882 0 1
590 1 35.000000 0 0 7.1250 0 0 1 0 0 1 0.155882 0 1
591 0 52.000000 1 0 78.2667 1 0 0 1 0 0 0.811765 0 0
592 1 47.000000 0 0 7.2500 0 0 1 0 0 1 0.155882 0 1
593 0 27.694073 0 2 7.7500 0 1 0 0 0 1 0.703704 0 0
594 1 37.000000 1 0 26.0000 0 0 1 0 1 0 0.155882 0 0
595 1 36.000000 1 1 24.1500 0 0 1 0 0 1 0.155882 0 0
596 0 26.601009 0 0 33.0000 0 0 1 0 1 0 0.703704 0 1
597 1 49.000000 0 0 0.0000 0 0 1 0 0 1 0.155882 0 1
598 1 32.150027 0 0 7.2250 1 0 0 0 0 1 0.155882 0 1
599 1 49.000000 1 0 56.9292 1 0 0 1 0 0 1.000000 0 0

600 rows × 14 columns


In [101]:
from sklearn import preprocessing
preprocessing.MinMaxScaler().fit_transform(source2)


Out[101]:
array([[ 1.        ,  0.3024911 ,  0.125     , ...,  0.15588235,
         0.        ,  0.        ],
       [ 0.        ,  0.53024911,  0.125     , ...,  0.81176471,
         0.        ,  0.        ],
       [ 0.        ,  0.3594306 ,  0.        , ...,  0.7037037 ,
         0.        ,  1.        ],
       ..., 
       [ 1.        ,  0.68683274,  0.        , ...,  0.15588235,
         0.        ,  1.        ],
       [ 1.        ,  0.44697547,  0.        , ...,  0.15588235,
         0.        ,  1.        ],
       [ 1.        ,  0.68683274,  0.125     , ...,  1.        ,
         0.        ,  0.        ]])

In [ ]: