In [105]:
# coding=utf-8
import pandas as pd
import numpy as np
train_df = pd.read_csv('./data/train.csv', index_col=False).head(540)
valid_df = pd.read_csv('./data/train.csv', index_col=False)[540:700]
test_df = pd.read_csv('./data/train.csv', index_col=False).tail(191)
target_df = pd.read_csv('./data/test.csv', index_col=False)
def set_missing_ages(df, features, target):
# 根据所坐舱位等数字讯息推断年龄
target_df = df[features]
known = target_df[target_df[target].notnull()].as_matrix()
unknown = target_df[target_df[target].isnull()].as_matrix()
y = known[:, 0]
X = known[:, 1:]
if len(unknown):
rfr = RandomForestRegressor(
random_state=0, n_estimators=2000, n_jobs=-1)
rfr.fit(X, y)
predicted = rfr.predict(unknown[:, 1::])
df.loc[(df[target].isnull()), target] = predicted
return df
def set_missing_ages_2(df, feature):
# 根据姓名求年龄中位数
df['Age'].fillna(-1, inplace=True)
titles = df['Name'].unique()
medians = dict()
for title in titles:
median = df.Age[(df["Age"] != -1) & (df['Name'] == title)].median()
medians[title] = median
for index, row in df.iterrows():
if row['Age'] == -1:
df.loc[index, 'Age'] = medians[row['Name']]
return df
# Name Ticket Cabin
def title_keymap_generate(target):
titles = target.Name.str.extract(' ([A-Za-z]+)\.', expand=False)
km = titles.unique()
survived_rate = pd.Series(0.0, index=km)
for title in km:
survived_total = target.Survived[titles == title].value_counts()
if 1 in survived_total:
survived_rate[title] = float(survived_total[1]) / float(sum(survived_total))
else:
survived_rate[title] = 0
return survived_rate
def sibsp_map_generate(source):
km = source['SibSp'].unique()
sib_rate = pd.Series(0.0, index=km)
for sib in km:
survived_total = source.Survived[source['SibSp'] == sib].value_counts()
if 1 in survived_total:
sib_rate[sib] = float(survived_total[1]) / float(sum(survived_total))
else:
sib_rate[sib] = 0
return sib_rate
survived_rate = title_keymap_generate(train_df)
sib_rate = sibsp_map_generate(train_df)
source = train_df
source.Cabin = source.Cabin.str.extract('([A-Z])\d+', expand=False)
source.Cabin.fillna('NULL', inplace=True)
source.Fare.fillna(source['Fare'].dropna().median(), inplace=True)
dummies_embarked = pd.get_dummies(source['Embarked'], prefix='Embarked')
dummies_cabin = pd.get_dummies(source['Cabin'], prefix='Cabin')
dummies_Pclass = pd.get_dummies(source['Pclass'], prefix='Pclass')
source = pd.concat([source, dummies_embarked, dummies_Pclass, dummies_cabin], axis=1)
source['Title'] = source.Name.str.extract(' ([A-Za-z]+)\.', expand=False).map(survived_rate, na_action=None)
source['Title'].fillna(0.5, inplace=True)
source['Sex'] = source['Sex'].map(lambda x: 1 if x == 'male' else 0)
source['isChild'] = source['Age'].map(lambda x: 1 if x <= 16 else 0)
source['isAlone'] = 0
source['FamilySize'] = source['SibSp'] + source['Parch'] + 1
source['SibSp'] = source['SibSp'].map(sib_rate, na_action=None)
source['SibSp'].fillna(0.5, inplace=True)
source.loc[source['FamilySize'] == 1, 'isAlone'] = 1
source = set_missing_ages(source, ['Age', 'Fare', 'Parch', 'SibSp', 'Pclass'], 'Age')
source = source.filter(regex='isChild|isAlone|Title|Age|SibSp|Parch|Fare|Embarked_.*|Cabin_.*|Sex|Pclass_.*')
source.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 540 entries, 0 to 539
Data columns (total 22 columns):
Sex 540 non-null int64
Age 540 non-null float64
SibSp 540 non-null float64
Parch 540 non-null int64
Fare 540 non-null float64
Embarked_C 540 non-null uint8
Embarked_Q 540 non-null uint8
Embarked_S 540 non-null uint8
Pclass_1 540 non-null uint8
Pclass_2 540 non-null uint8
Pclass_3 540 non-null uint8
Cabin_A 540 non-null uint8
Cabin_B 540 non-null uint8
Cabin_C 540 non-null uint8
Cabin_D 540 non-null uint8
Cabin_E 540 non-null uint8
Cabin_F 540 non-null uint8
Cabin_G 540 non-null uint8
Cabin_NULL 540 non-null uint8
Title 540 non-null float64
isChild 540 non-null int64
isAlone 540 non-null int64
dtypes: float64(4), int64(4), uint8(14)
memory usage: 41.2 KB
In [91]:
source.Fare.fillna(source['Fare'].dropna().median())
source
Out[91]:
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
0
1
0
3
Braund, Mr. Owen Harris
male
22.0
1
0
A/5 21171
7.2500
NaN
S
1
2
1
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38.0
1
0
PC 17599
71.2833
C85
C
2
3
1
3
Heikkinen, Miss. Laina
female
26.0
0
0
STON/O2. 3101282
7.9250
NaN
S
3
4
1
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35.0
1
0
113803
53.1000
C123
S
4
5
0
3
Allen, Mr. William Henry
male
35.0
0
0
373450
8.0500
NaN
S
5
6
0
3
Moran, Mr. James
male
NaN
0
0
330877
8.4583
NaN
Q
6
7
0
1
McCarthy, Mr. Timothy J
male
54.0
0
0
17463
51.8625
E46
S
7
8
0
3
Palsson, Master. Gosta Leonard
male
2.0
3
1
349909
21.0750
NaN
S
8
9
1
3
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
female
27.0
0
2
347742
11.1333
NaN
S
9
10
1
2
Nasser, Mrs. Nicholas (Adele Achem)
female
14.0
1
0
237736
30.0708
NaN
C
10
11
1
3
Sandstrom, Miss. Marguerite Rut
female
4.0
1
1
PP 9549
16.7000
G6
S
11
12
1
1
Bonnell, Miss. Elizabeth
female
58.0
0
0
113783
26.5500
C103
S
12
13
0
3
Saundercock, Mr. William Henry
male
20.0
0
0
A/5. 2151
8.0500
NaN
S
13
14
0
3
Andersson, Mr. Anders Johan
male
39.0
1
5
347082
31.2750
NaN
S
14
15
0
3
Vestrom, Miss. Hulda Amanda Adolfina
female
14.0
0
0
350406
7.8542
NaN
S
15
16
1
2
Hewlett, Mrs. (Mary D Kingcome)
female
55.0
0
0
248706
16.0000
NaN
S
16
17
0
3
Rice, Master. Eugene
male
2.0
4
1
382652
29.1250
NaN
Q
17
18
1
2
Williams, Mr. Charles Eugene
male
NaN
0
0
244373
13.0000
NaN
S
18
19
0
3
Vander Planke, Mrs. Julius (Emelia Maria Vande...
female
31.0
1
0
345763
18.0000
NaN
S
19
20
1
3
Masselmani, Mrs. Fatima
female
NaN
0
0
2649
7.2250
NaN
C
20
21
0
2
Fynney, Mr. Joseph J
male
35.0
0
0
239865
26.0000
NaN
S
21
22
1
2
Beesley, Mr. Lawrence
male
34.0
0
0
248698
13.0000
D56
S
22
23
1
3
McGowan, Miss. Anna "Annie"
female
15.0
0
0
330923
8.0292
NaN
Q
23
24
1
1
Sloper, Mr. William Thompson
male
28.0
0
0
113788
35.5000
A6
S
24
25
0
3
Palsson, Miss. Torborg Danira
female
8.0
3
1
349909
21.0750
NaN
S
25
26
1
3
Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
female
38.0
1
5
347077
31.3875
NaN
S
26
27
0
3
Emir, Mr. Farred Chehab
male
NaN
0
0
2631
7.2250
NaN
C
27
28
0
1
Fortune, Mr. Charles Alexander
male
19.0
3
2
19950
263.0000
C23 C25 C27
S
28
29
1
3
O'Dwyer, Miss. Ellen "Nellie"
female
NaN
0
0
330959
7.8792
NaN
Q
29
30
0
3
Todoroff, Mr. Lalio
male
NaN
0
0
349216
7.8958
NaN
S
...
...
...
...
...
...
...
...
...
...
...
...
...
570
571
1
2
Harris, Mr. George
male
62.0
0
0
S.W./PP 752
10.5000
NaN
S
571
572
1
1
Appleton, Mrs. Edward Dale (Charlotte Lamson)
female
53.0
2
0
11769
51.4792
C101
S
572
573
1
1
Flynn, Mr. John Irwin ("Irving")
male
36.0
0
0
PC 17474
26.3875
E25
S
573
574
1
3
Kelly, Miss. Mary
female
NaN
0
0
14312
7.7500
NaN
Q
574
575
0
3
Rush, Mr. Alfred George John
male
16.0
0
0
A/4. 20589
8.0500
NaN
S
575
576
0
3
Patchett, Mr. George
male
19.0
0
0
358585
14.5000
NaN
S
576
577
1
2
Garside, Miss. Ethel
female
34.0
0
0
243880
13.0000
NaN
S
577
578
1
1
Silvey, Mrs. William Baird (Alice Munger)
female
39.0
1
0
13507
55.9000
E44
S
578
579
0
3
Caram, Mrs. Joseph (Maria Elias)
female
NaN
1
0
2689
14.4583
NaN
C
579
580
1
3
Jussila, Mr. Eiriik
male
32.0
0
0
STON/O 2. 3101286
7.9250
NaN
S
580
581
1
2
Christy, Miss. Julie Rachel
female
25.0
1
1
237789
30.0000
NaN
S
581
582
1
1
Thayer, Mrs. John Borland (Marian Longstreth M...
female
39.0
1
1
17421
110.8833
C68
C
582
583
0
2
Downton, Mr. William James
male
54.0
0
0
28403
26.0000
NaN
S
583
584
0
1
Ross, Mr. John Hugo
male
36.0
0
0
13049
40.1250
A10
C
584
585
0
3
Paulner, Mr. Uscher
male
NaN
0
0
3411
8.7125
NaN
C
585
586
1
1
Taussig, Miss. Ruth
female
18.0
0
2
110413
79.6500
E68
S
586
587
0
2
Jarvis, Mr. John Denzil
male
47.0
0
0
237565
15.0000
NaN
S
587
588
1
1
Frolicher-Stehli, Mr. Maxmillian
male
60.0
1
1
13567
79.2000
B41
C
588
589
0
3
Gilinski, Mr. Eliezer
male
22.0
0
0
14973
8.0500
NaN
S
589
590
0
3
Murdlin, Mr. Joseph
male
NaN
0
0
A./5. 3235
8.0500
NaN
S
590
591
0
3
Rintamaki, Mr. Matti
male
35.0
0
0
STON/O 2. 3101273
7.1250
NaN
S
591
592
1
1
Stephenson, Mrs. Walter Bertram (Martha Eustis)
female
52.0
1
0
36947
78.2667
D20
C
592
593
0
3
Elsbury, Mr. William James
male
47.0
0
0
A/5 3902
7.2500
NaN
S
593
594
0
3
Bourke, Miss. Mary
female
NaN
0
2
364848
7.7500
NaN
Q
594
595
0
2
Chapman, Mr. John Henry
male
37.0
1
0
SC/AH 29037
26.0000
NaN
S
595
596
0
3
Van Impe, Mr. Jean Baptiste
male
36.0
1
1
345773
24.1500
NaN
S
596
597
1
2
Leitch, Miss. Jessie Wills
female
NaN
0
0
248727
33.0000
NaN
S
597
598
0
3
Johnson, Mr. Alfred
male
49.0
0
0
LINE
0.0000
NaN
S
598
599
0
3
Boulos, Mr. Hanna
male
NaN
0
0
2664
7.2250
NaN
C
599
600
1
1
Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan")
male
49.0
1
0
PC 17485
56.9292
A20
C
600 rows × 12 columns
In [92]:
dummies_embarked = pd.get_dummies(source['Embarked'], prefix='Embarked')
# dummies_sex = pd.get_dummies(source['Sex'], prefix='Sex')
dummies_Pclass = pd.get_dummies(source['Pclass'], prefix='Pclass')
source = pd.concat([source, dummies_embarked, dummies_Pclass], axis=1)
source
Out[92]:
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
0
1
0
3
Braund, Mr. Owen Harris
male
22.0
1
0
A/5 21171
7.2500
NaN
S
0
0
1
0
0
1
1
2
1
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38.0
1
0
PC 17599
71.2833
C85
C
1
0
0
1
0
0
2
3
1
3
Heikkinen, Miss. Laina
female
26.0
0
0
STON/O2. 3101282
7.9250
NaN
S
0
0
1
0
0
1
3
4
1
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35.0
1
0
113803
53.1000
C123
S
0
0
1
1
0
0
4
5
0
3
Allen, Mr. William Henry
male
35.0
0
0
373450
8.0500
NaN
S
0
0
1
0
0
1
5
6
0
3
Moran, Mr. James
male
NaN
0
0
330877
8.4583
NaN
Q
0
1
0
0
0
1
6
7
0
1
McCarthy, Mr. Timothy J
male
54.0
0
0
17463
51.8625
E46
S
0
0
1
1
0
0
7
8
0
3
Palsson, Master. Gosta Leonard
male
2.0
3
1
349909
21.0750
NaN
S
0
0
1
0
0
1
8
9
1
3
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
female
27.0
0
2
347742
11.1333
NaN
S
0
0
1
0
0
1
9
10
1
2
Nasser, Mrs. Nicholas (Adele Achem)
female
14.0
1
0
237736
30.0708
NaN
C
1
0
0
0
1
0
10
11
1
3
Sandstrom, Miss. Marguerite Rut
female
4.0
1
1
PP 9549
16.7000
G6
S
0
0
1
0
0
1
11
12
1
1
Bonnell, Miss. Elizabeth
female
58.0
0
0
113783
26.5500
C103
S
0
0
1
1
0
0
12
13
0
3
Saundercock, Mr. William Henry
male
20.0
0
0
A/5. 2151
8.0500
NaN
S
0
0
1
0
0
1
13
14
0
3
Andersson, Mr. Anders Johan
male
39.0
1
5
347082
31.2750
NaN
S
0
0
1
0
0
1
14
15
0
3
Vestrom, Miss. Hulda Amanda Adolfina
female
14.0
0
0
350406
7.8542
NaN
S
0
0
1
0
0
1
15
16
1
2
Hewlett, Mrs. (Mary D Kingcome)
female
55.0
0
0
248706
16.0000
NaN
S
0
0
1
0
1
0
16
17
0
3
Rice, Master. Eugene
male
2.0
4
1
382652
29.1250
NaN
Q
0
1
0
0
0
1
17
18
1
2
Williams, Mr. Charles Eugene
male
NaN
0
0
244373
13.0000
NaN
S
0
0
1
0
1
0
18
19
0
3
Vander Planke, Mrs. Julius (Emelia Maria Vande...
female
31.0
1
0
345763
18.0000
NaN
S
0
0
1
0
0
1
19
20
1
3
Masselmani, Mrs. Fatima
female
NaN
0
0
2649
7.2250
NaN
C
1
0
0
0
0
1
20
21
0
2
Fynney, Mr. Joseph J
male
35.0
0
0
239865
26.0000
NaN
S
0
0
1
0
1
0
21
22
1
2
Beesley, Mr. Lawrence
male
34.0
0
0
248698
13.0000
D56
S
0
0
1
0
1
0
22
23
1
3
McGowan, Miss. Anna "Annie"
female
15.0
0
0
330923
8.0292
NaN
Q
0
1
0
0
0
1
23
24
1
1
Sloper, Mr. William Thompson
male
28.0
0
0
113788
35.5000
A6
S
0
0
1
1
0
0
24
25
0
3
Palsson, Miss. Torborg Danira
female
8.0
3
1
349909
21.0750
NaN
S
0
0
1
0
0
1
25
26
1
3
Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
female
38.0
1
5
347077
31.3875
NaN
S
0
0
1
0
0
1
26
27
0
3
Emir, Mr. Farred Chehab
male
NaN
0
0
2631
7.2250
NaN
C
1
0
0
0
0
1
27
28
0
1
Fortune, Mr. Charles Alexander
male
19.0
3
2
19950
263.0000
C23 C25 C27
S
0
0
1
1
0
0
28
29
1
3
O'Dwyer, Miss. Ellen "Nellie"
female
NaN
0
0
330959
7.8792
NaN
Q
0
1
0
0
0
1
29
30
0
3
Todoroff, Mr. Lalio
male
NaN
0
0
349216
7.8958
NaN
S
0
0
1
0
0
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
570
571
1
2
Harris, Mr. George
male
62.0
0
0
S.W./PP 752
10.5000
NaN
S
0
0
1
0
1
0
571
572
1
1
Appleton, Mrs. Edward Dale (Charlotte Lamson)
female
53.0
2
0
11769
51.4792
C101
S
0
0
1
1
0
0
572
573
1
1
Flynn, Mr. John Irwin ("Irving")
male
36.0
0
0
PC 17474
26.3875
E25
S
0
0
1
1
0
0
573
574
1
3
Kelly, Miss. Mary
female
NaN
0
0
14312
7.7500
NaN
Q
0
1
0
0
0
1
574
575
0
3
Rush, Mr. Alfred George John
male
16.0
0
0
A/4. 20589
8.0500
NaN
S
0
0
1
0
0
1
575
576
0
3
Patchett, Mr. George
male
19.0
0
0
358585
14.5000
NaN
S
0
0
1
0
0
1
576
577
1
2
Garside, Miss. Ethel
female
34.0
0
0
243880
13.0000
NaN
S
0
0
1
0
1
0
577
578
1
1
Silvey, Mrs. William Baird (Alice Munger)
female
39.0
1
0
13507
55.9000
E44
S
0
0
1
1
0
0
578
579
0
3
Caram, Mrs. Joseph (Maria Elias)
female
NaN
1
0
2689
14.4583
NaN
C
1
0
0
0
0
1
579
580
1
3
Jussila, Mr. Eiriik
male
32.0
0
0
STON/O 2. 3101286
7.9250
NaN
S
0
0
1
0
0
1
580
581
1
2
Christy, Miss. Julie Rachel
female
25.0
1
1
237789
30.0000
NaN
S
0
0
1
0
1
0
581
582
1
1
Thayer, Mrs. John Borland (Marian Longstreth M...
female
39.0
1
1
17421
110.8833
C68
C
1
0
0
1
0
0
582
583
0
2
Downton, Mr. William James
male
54.0
0
0
28403
26.0000
NaN
S
0
0
1
0
1
0
583
584
0
1
Ross, Mr. John Hugo
male
36.0
0
0
13049
40.1250
A10
C
1
0
0
1
0
0
584
585
0
3
Paulner, Mr. Uscher
male
NaN
0
0
3411
8.7125
NaN
C
1
0
0
0
0
1
585
586
1
1
Taussig, Miss. Ruth
female
18.0
0
2
110413
79.6500
E68
S
0
0
1
1
0
0
586
587
0
2
Jarvis, Mr. John Denzil
male
47.0
0
0
237565
15.0000
NaN
S
0
0
1
0
1
0
587
588
1
1
Frolicher-Stehli, Mr. Maxmillian
male
60.0
1
1
13567
79.2000
B41
C
1
0
0
1
0
0
588
589
0
3
Gilinski, Mr. Eliezer
male
22.0
0
0
14973
8.0500
NaN
S
0
0
1
0
0
1
589
590
0
3
Murdlin, Mr. Joseph
male
NaN
0
0
A./5. 3235
8.0500
NaN
S
0
0
1
0
0
1
590
591
0
3
Rintamaki, Mr. Matti
male
35.0
0
0
STON/O 2. 3101273
7.1250
NaN
S
0
0
1
0
0
1
591
592
1
1
Stephenson, Mrs. Walter Bertram (Martha Eustis)
female
52.0
1
0
36947
78.2667
D20
C
1
0
0
1
0
0
592
593
0
3
Elsbury, Mr. William James
male
47.0
0
0
A/5 3902
7.2500
NaN
S
0
0
1
0
0
1
593
594
0
3
Bourke, Miss. Mary
female
NaN
0
2
364848
7.7500
NaN
Q
0
1
0
0
0
1
594
595
0
2
Chapman, Mr. John Henry
male
37.0
1
0
SC/AH 29037
26.0000
NaN
S
0
0
1
0
1
0
595
596
0
3
Van Impe, Mr. Jean Baptiste
male
36.0
1
1
345773
24.1500
NaN
S
0
0
1
0
0
1
596
597
1
2
Leitch, Miss. Jessie Wills
female
NaN
0
0
248727
33.0000
NaN
S
0
0
1
0
1
0
597
598
0
3
Johnson, Mr. Alfred
male
49.0
0
0
LINE
0.0000
NaN
S
0
0
1
0
0
1
598
599
0
3
Boulos, Mr. Hanna
male
NaN
0
0
2664
7.2250
NaN
C
1
0
0
0
0
1
599
600
1
1
Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan")
male
49.0
1
0
PC 17485
56.9292
A20
C
1
0
0
1
0
0
600 rows × 18 columns
In [93]:
source['Title'] = source.Name.str.extract(' ([A-Za-z]+)\.', expand=False)
source['Title']
Out[93]:
0 Mr
1 Mrs
2 Miss
3 Mrs
4 Mr
5 Mr
6 Mr
7 Master
8 Mrs
9 Mrs
10 Miss
11 Miss
12 Mr
13 Mr
14 Miss
15 Mrs
16 Master
17 Mr
18 Mrs
19 Mrs
20 Mr
21 Mr
22 Miss
23 Mr
24 Miss
25 Mrs
26 Mr
27 Mr
28 Miss
29 Mr
...
570 Mr
571 Mrs
572 Mr
573 Miss
574 Mr
575 Mr
576 Miss
577 Mrs
578 Mrs
579 Mr
580 Miss
581 Mrs
582 Mr
583 Mr
584 Mr
585 Miss
586 Mr
587 Mr
588 Mr
589 Mr
590 Mr
591 Mrs
592 Mr
593 Miss
594 Mr
595 Mr
596 Miss
597 Mr
598 Mr
599 Sir
Name: Title, Length: 600, dtype: object
In [1]:
source.Title.unique()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-1-e6ed9908333a> in <module>()
----> 1 source.Title.unique()
2 source['agepower'] = source['Age'] ^ 2
NameError: name 'source' is not defined
In [95]:
import matplotlib.pyplot as plt
%matplotlib inline
fig = plt.figure(figsize=(15,6))
km = source['Title'].unique()
survived_rate = pd.Series(0.0, index=km)
for title in km:
title_df = source.Survived[source['Title'] == title]
survived_total = source.Survived[source['Title'] == title].value_counts()
if 1 in survived_total:
survived_rate[title] = float(survived_total[1]) / float(sum(survived_total))
else:
survived_rate[title] = 0
source['Title'] = source['Title'].map(survived_rate)
source
Out[95]:
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
Title
0
1
0
3
Braund, Mr. Owen Harris
male
22.0
1
0
A/5 21171
7.2500
NaN
S
0
0
1
0
0
1
0.155882
1
2
1
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38.0
1
0
PC 17599
71.2833
C85
C
1
0
0
1
0
0
0.811765
2
3
1
3
Heikkinen, Miss. Laina
female
26.0
0
0
STON/O2. 3101282
7.9250
NaN
S
0
0
1
0
0
1
0.703704
3
4
1
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35.0
1
0
113803
53.1000
C123
S
0
0
1
1
0
0
0.811765
4
5
0
3
Allen, Mr. William Henry
male
35.0
0
0
373450
8.0500
NaN
S
0
0
1
0
0
1
0.155882
5
6
0
3
Moran, Mr. James
male
NaN
0
0
330877
8.4583
NaN
Q
0
1
0
0
0
1
0.155882
6
7
0
1
McCarthy, Mr. Timothy J
male
54.0
0
0
17463
51.8625
E46
S
0
0
1
1
0
0
0.155882
7
8
0
3
Palsson, Master. Gosta Leonard
male
2.0
3
1
349909
21.0750
NaN
S
0
0
1
0
0
1
0.518519
8
9
1
3
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
female
27.0
0
2
347742
11.1333
NaN
S
0
0
1
0
0
1
0.811765
9
10
1
2
Nasser, Mrs. Nicholas (Adele Achem)
female
14.0
1
0
237736
30.0708
NaN
C
1
0
0
0
1
0
0.811765
10
11
1
3
Sandstrom, Miss. Marguerite Rut
female
4.0
1
1
PP 9549
16.7000
G6
S
0
0
1
0
0
1
0.703704
11
12
1
1
Bonnell, Miss. Elizabeth
female
58.0
0
0
113783
26.5500
C103
S
0
0
1
1
0
0
0.703704
12
13
0
3
Saundercock, Mr. William Henry
male
20.0
0
0
A/5. 2151
8.0500
NaN
S
0
0
1
0
0
1
0.155882
13
14
0
3
Andersson, Mr. Anders Johan
male
39.0
1
5
347082
31.2750
NaN
S
0
0
1
0
0
1
0.155882
14
15
0
3
Vestrom, Miss. Hulda Amanda Adolfina
female
14.0
0
0
350406
7.8542
NaN
S
0
0
1
0
0
1
0.703704
15
16
1
2
Hewlett, Mrs. (Mary D Kingcome)
female
55.0
0
0
248706
16.0000
NaN
S
0
0
1
0
1
0
0.811765
16
17
0
3
Rice, Master. Eugene
male
2.0
4
1
382652
29.1250
NaN
Q
0
1
0
0
0
1
0.518519
17
18
1
2
Williams, Mr. Charles Eugene
male
NaN
0
0
244373
13.0000
NaN
S
0
0
1
0
1
0
0.155882
18
19
0
3
Vander Planke, Mrs. Julius (Emelia Maria Vande...
female
31.0
1
0
345763
18.0000
NaN
S
0
0
1
0
0
1
0.811765
19
20
1
3
Masselmani, Mrs. Fatima
female
NaN
0
0
2649
7.2250
NaN
C
1
0
0
0
0
1
0.811765
20
21
0
2
Fynney, Mr. Joseph J
male
35.0
0
0
239865
26.0000
NaN
S
0
0
1
0
1
0
0.155882
21
22
1
2
Beesley, Mr. Lawrence
male
34.0
0
0
248698
13.0000
D56
S
0
0
1
0
1
0
0.155882
22
23
1
3
McGowan, Miss. Anna "Annie"
female
15.0
0
0
330923
8.0292
NaN
Q
0
1
0
0
0
1
0.703704
23
24
1
1
Sloper, Mr. William Thompson
male
28.0
0
0
113788
35.5000
A6
S
0
0
1
1
0
0
0.155882
24
25
0
3
Palsson, Miss. Torborg Danira
female
8.0
3
1
349909
21.0750
NaN
S
0
0
1
0
0
1
0.703704
25
26
1
3
Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
female
38.0
1
5
347077
31.3875
NaN
S
0
0
1
0
0
1
0.811765
26
27
0
3
Emir, Mr. Farred Chehab
male
NaN
0
0
2631
7.2250
NaN
C
1
0
0
0
0
1
0.155882
27
28
0
1
Fortune, Mr. Charles Alexander
male
19.0
3
2
19950
263.0000
C23 C25 C27
S
0
0
1
1
0
0
0.155882
28
29
1
3
O'Dwyer, Miss. Ellen "Nellie"
female
NaN
0
0
330959
7.8792
NaN
Q
0
1
0
0
0
1
0.703704
29
30
0
3
Todoroff, Mr. Lalio
male
NaN
0
0
349216
7.8958
NaN
S
0
0
1
0
0
1
0.155882
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
570
571
1
2
Harris, Mr. George
male
62.0
0
0
S.W./PP 752
10.5000
NaN
S
0
0
1
0
1
0
0.155882
571
572
1
1
Appleton, Mrs. Edward Dale (Charlotte Lamson)
female
53.0
2
0
11769
51.4792
C101
S
0
0
1
1
0
0
0.811765
572
573
1
1
Flynn, Mr. John Irwin ("Irving")
male
36.0
0
0
PC 17474
26.3875
E25
S
0
0
1
1
0
0
0.155882
573
574
1
3
Kelly, Miss. Mary
female
NaN
0
0
14312
7.7500
NaN
Q
0
1
0
0
0
1
0.703704
574
575
0
3
Rush, Mr. Alfred George John
male
16.0
0
0
A/4. 20589
8.0500
NaN
S
0
0
1
0
0
1
0.155882
575
576
0
3
Patchett, Mr. George
male
19.0
0
0
358585
14.5000
NaN
S
0
0
1
0
0
1
0.155882
576
577
1
2
Garside, Miss. Ethel
female
34.0
0
0
243880
13.0000
NaN
S
0
0
1
0
1
0
0.703704
577
578
1
1
Silvey, Mrs. William Baird (Alice Munger)
female
39.0
1
0
13507
55.9000
E44
S
0
0
1
1
0
0
0.811765
578
579
0
3
Caram, Mrs. Joseph (Maria Elias)
female
NaN
1
0
2689
14.4583
NaN
C
1
0
0
0
0
1
0.811765
579
580
1
3
Jussila, Mr. Eiriik
male
32.0
0
0
STON/O 2. 3101286
7.9250
NaN
S
0
0
1
0
0
1
0.155882
580
581
1
2
Christy, Miss. Julie Rachel
female
25.0
1
1
237789
30.0000
NaN
S
0
0
1
0
1
0
0.703704
581
582
1
1
Thayer, Mrs. John Borland (Marian Longstreth M...
female
39.0
1
1
17421
110.8833
C68
C
1
0
0
1
0
0
0.811765
582
583
0
2
Downton, Mr. William James
male
54.0
0
0
28403
26.0000
NaN
S
0
0
1
0
1
0
0.155882
583
584
0
1
Ross, Mr. John Hugo
male
36.0
0
0
13049
40.1250
A10
C
1
0
0
1
0
0
0.155882
584
585
0
3
Paulner, Mr. Uscher
male
NaN
0
0
3411
8.7125
NaN
C
1
0
0
0
0
1
0.155882
585
586
1
1
Taussig, Miss. Ruth
female
18.0
0
2
110413
79.6500
E68
S
0
0
1
1
0
0
0.703704
586
587
0
2
Jarvis, Mr. John Denzil
male
47.0
0
0
237565
15.0000
NaN
S
0
0
1
0
1
0
0.155882
587
588
1
1
Frolicher-Stehli, Mr. Maxmillian
male
60.0
1
1
13567
79.2000
B41
C
1
0
0
1
0
0
0.155882
588
589
0
3
Gilinski, Mr. Eliezer
male
22.0
0
0
14973
8.0500
NaN
S
0
0
1
0
0
1
0.155882
589
590
0
3
Murdlin, Mr. Joseph
male
NaN
0
0
A./5. 3235
8.0500
NaN
S
0
0
1
0
0
1
0.155882
590
591
0
3
Rintamaki, Mr. Matti
male
35.0
0
0
STON/O 2. 3101273
7.1250
NaN
S
0
0
1
0
0
1
0.155882
591
592
1
1
Stephenson, Mrs. Walter Bertram (Martha Eustis)
female
52.0
1
0
36947
78.2667
D20
C
1
0
0
1
0
0
0.811765
592
593
0
3
Elsbury, Mr. William James
male
47.0
0
0
A/5 3902
7.2500
NaN
S
0
0
1
0
0
1
0.155882
593
594
0
3
Bourke, Miss. Mary
female
NaN
0
2
364848
7.7500
NaN
Q
0
1
0
0
0
1
0.703704
594
595
0
2
Chapman, Mr. John Henry
male
37.0
1
0
SC/AH 29037
26.0000
NaN
S
0
0
1
0
1
0
0.155882
595
596
0
3
Van Impe, Mr. Jean Baptiste
male
36.0
1
1
345773
24.1500
NaN
S
0
0
1
0
0
1
0.155882
596
597
1
2
Leitch, Miss. Jessie Wills
female
NaN
0
0
248727
33.0000
NaN
S
0
0
1
0
1
0
0.703704
597
598
0
3
Johnson, Mr. Alfred
male
49.0
0
0
LINE
0.0000
NaN
S
0
0
1
0
0
1
0.155882
598
599
0
3
Boulos, Mr. Hanna
male
NaN
0
0
2664
7.2250
NaN
C
1
0
0
0
0
1
0.155882
599
600
1
1
Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan")
male
49.0
1
0
PC 17485
56.9292
A20
C
1
0
0
1
0
0
1.000000
600 rows × 19 columns
<matplotlib.figure.Figure at 0x10bcbb860>
In [96]:
source['isChild'] = source['Age'].map(lambda x: 1 if x <= 16 else 0)
source
Out[96]:
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
Title
isChild
0
1
0
3
Braund, Mr. Owen Harris
male
22.0
1
0
A/5 21171
7.2500
NaN
S
0
0
1
0
0
1
0.155882
0
1
2
1
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38.0
1
0
PC 17599
71.2833
C85
C
1
0
0
1
0
0
0.811765
0
2
3
1
3
Heikkinen, Miss. Laina
female
26.0
0
0
STON/O2. 3101282
7.9250
NaN
S
0
0
1
0
0
1
0.703704
0
3
4
1
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35.0
1
0
113803
53.1000
C123
S
0
0
1
1
0
0
0.811765
0
4
5
0
3
Allen, Mr. William Henry
male
35.0
0
0
373450
8.0500
NaN
S
0
0
1
0
0
1
0.155882
0
5
6
0
3
Moran, Mr. James
male
NaN
0
0
330877
8.4583
NaN
Q
0
1
0
0
0
1
0.155882
0
6
7
0
1
McCarthy, Mr. Timothy J
male
54.0
0
0
17463
51.8625
E46
S
0
0
1
1
0
0
0.155882
0
7
8
0
3
Palsson, Master. Gosta Leonard
male
2.0
3
1
349909
21.0750
NaN
S
0
0
1
0
0
1
0.518519
1
8
9
1
3
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
female
27.0
0
2
347742
11.1333
NaN
S
0
0
1
0
0
1
0.811765
0
9
10
1
2
Nasser, Mrs. Nicholas (Adele Achem)
female
14.0
1
0
237736
30.0708
NaN
C
1
0
0
0
1
0
0.811765
1
10
11
1
3
Sandstrom, Miss. Marguerite Rut
female
4.0
1
1
PP 9549
16.7000
G6
S
0
0
1
0
0
1
0.703704
1
11
12
1
1
Bonnell, Miss. Elizabeth
female
58.0
0
0
113783
26.5500
C103
S
0
0
1
1
0
0
0.703704
0
12
13
0
3
Saundercock, Mr. William Henry
male
20.0
0
0
A/5. 2151
8.0500
NaN
S
0
0
1
0
0
1
0.155882
0
13
14
0
3
Andersson, Mr. Anders Johan
male
39.0
1
5
347082
31.2750
NaN
S
0
0
1
0
0
1
0.155882
0
14
15
0
3
Vestrom, Miss. Hulda Amanda Adolfina
female
14.0
0
0
350406
7.8542
NaN
S
0
0
1
0
0
1
0.703704
1
15
16
1
2
Hewlett, Mrs. (Mary D Kingcome)
female
55.0
0
0
248706
16.0000
NaN
S
0
0
1
0
1
0
0.811765
0
16
17
0
3
Rice, Master. Eugene
male
2.0
4
1
382652
29.1250
NaN
Q
0
1
0
0
0
1
0.518519
1
17
18
1
2
Williams, Mr. Charles Eugene
male
NaN
0
0
244373
13.0000
NaN
S
0
0
1
0
1
0
0.155882
0
18
19
0
3
Vander Planke, Mrs. Julius (Emelia Maria Vande...
female
31.0
1
0
345763
18.0000
NaN
S
0
0
1
0
0
1
0.811765
0
19
20
1
3
Masselmani, Mrs. Fatima
female
NaN
0
0
2649
7.2250
NaN
C
1
0
0
0
0
1
0.811765
0
20
21
0
2
Fynney, Mr. Joseph J
male
35.0
0
0
239865
26.0000
NaN
S
0
0
1
0
1
0
0.155882
0
21
22
1
2
Beesley, Mr. Lawrence
male
34.0
0
0
248698
13.0000
D56
S
0
0
1
0
1
0
0.155882
0
22
23
1
3
McGowan, Miss. Anna "Annie"
female
15.0
0
0
330923
8.0292
NaN
Q
0
1
0
0
0
1
0.703704
1
23
24
1
1
Sloper, Mr. William Thompson
male
28.0
0
0
113788
35.5000
A6
S
0
0
1
1
0
0
0.155882
0
24
25
0
3
Palsson, Miss. Torborg Danira
female
8.0
3
1
349909
21.0750
NaN
S
0
0
1
0
0
1
0.703704
1
25
26
1
3
Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
female
38.0
1
5
347077
31.3875
NaN
S
0
0
1
0
0
1
0.811765
0
26
27
0
3
Emir, Mr. Farred Chehab
male
NaN
0
0
2631
7.2250
NaN
C
1
0
0
0
0
1
0.155882
0
27
28
0
1
Fortune, Mr. Charles Alexander
male
19.0
3
2
19950
263.0000
C23 C25 C27
S
0
0
1
1
0
0
0.155882
0
28
29
1
3
O'Dwyer, Miss. Ellen "Nellie"
female
NaN
0
0
330959
7.8792
NaN
Q
0
1
0
0
0
1
0.703704
0
29
30
0
3
Todoroff, Mr. Lalio
male
NaN
0
0
349216
7.8958
NaN
S
0
0
1
0
0
1
0.155882
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
570
571
1
2
Harris, Mr. George
male
62.0
0
0
S.W./PP 752
10.5000
NaN
S
0
0
1
0
1
0
0.155882
0
571
572
1
1
Appleton, Mrs. Edward Dale (Charlotte Lamson)
female
53.0
2
0
11769
51.4792
C101
S
0
0
1
1
0
0
0.811765
0
572
573
1
1
Flynn, Mr. John Irwin ("Irving")
male
36.0
0
0
PC 17474
26.3875
E25
S
0
0
1
1
0
0
0.155882
0
573
574
1
3
Kelly, Miss. Mary
female
NaN
0
0
14312
7.7500
NaN
Q
0
1
0
0
0
1
0.703704
0
574
575
0
3
Rush, Mr. Alfred George John
male
16.0
0
0
A/4. 20589
8.0500
NaN
S
0
0
1
0
0
1
0.155882
1
575
576
0
3
Patchett, Mr. George
male
19.0
0
0
358585
14.5000
NaN
S
0
0
1
0
0
1
0.155882
0
576
577
1
2
Garside, Miss. Ethel
female
34.0
0
0
243880
13.0000
NaN
S
0
0
1
0
1
0
0.703704
0
577
578
1
1
Silvey, Mrs. William Baird (Alice Munger)
female
39.0
1
0
13507
55.9000
E44
S
0
0
1
1
0
0
0.811765
0
578
579
0
3
Caram, Mrs. Joseph (Maria Elias)
female
NaN
1
0
2689
14.4583
NaN
C
1
0
0
0
0
1
0.811765
0
579
580
1
3
Jussila, Mr. Eiriik
male
32.0
0
0
STON/O 2. 3101286
7.9250
NaN
S
0
0
1
0
0
1
0.155882
0
580
581
1
2
Christy, Miss. Julie Rachel
female
25.0
1
1
237789
30.0000
NaN
S
0
0
1
0
1
0
0.703704
0
581
582
1
1
Thayer, Mrs. John Borland (Marian Longstreth M...
female
39.0
1
1
17421
110.8833
C68
C
1
0
0
1
0
0
0.811765
0
582
583
0
2
Downton, Mr. William James
male
54.0
0
0
28403
26.0000
NaN
S
0
0
1
0
1
0
0.155882
0
583
584
0
1
Ross, Mr. John Hugo
male
36.0
0
0
13049
40.1250
A10
C
1
0
0
1
0
0
0.155882
0
584
585
0
3
Paulner, Mr. Uscher
male
NaN
0
0
3411
8.7125
NaN
C
1
0
0
0
0
1
0.155882
0
585
586
1
1
Taussig, Miss. Ruth
female
18.0
0
2
110413
79.6500
E68
S
0
0
1
1
0
0
0.703704
0
586
587
0
2
Jarvis, Mr. John Denzil
male
47.0
0
0
237565
15.0000
NaN
S
0
0
1
0
1
0
0.155882
0
587
588
1
1
Frolicher-Stehli, Mr. Maxmillian
male
60.0
1
1
13567
79.2000
B41
C
1
0
0
1
0
0
0.155882
0
588
589
0
3
Gilinski, Mr. Eliezer
male
22.0
0
0
14973
8.0500
NaN
S
0
0
1
0
0
1
0.155882
0
589
590
0
3
Murdlin, Mr. Joseph
male
NaN
0
0
A./5. 3235
8.0500
NaN
S
0
0
1
0
0
1
0.155882
0
590
591
0
3
Rintamaki, Mr. Matti
male
35.0
0
0
STON/O 2. 3101273
7.1250
NaN
S
0
0
1
0
0
1
0.155882
0
591
592
1
1
Stephenson, Mrs. Walter Bertram (Martha Eustis)
female
52.0
1
0
36947
78.2667
D20
C
1
0
0
1
0
0
0.811765
0
592
593
0
3
Elsbury, Mr. William James
male
47.0
0
0
A/5 3902
7.2500
NaN
S
0
0
1
0
0
1
0.155882
0
593
594
0
3
Bourke, Miss. Mary
female
NaN
0
2
364848
7.7500
NaN
Q
0
1
0
0
0
1
0.703704
0
594
595
0
2
Chapman, Mr. John Henry
male
37.0
1
0
SC/AH 29037
26.0000
NaN
S
0
0
1
0
1
0
0.155882
0
595
596
0
3
Van Impe, Mr. Jean Baptiste
male
36.0
1
1
345773
24.1500
NaN
S
0
0
1
0
0
1
0.155882
0
596
597
1
2
Leitch, Miss. Jessie Wills
female
NaN
0
0
248727
33.0000
NaN
S
0
0
1
0
1
0
0.703704
0
597
598
0
3
Johnson, Mr. Alfred
male
49.0
0
0
LINE
0.0000
NaN
S
0
0
1
0
0
1
0.155882
0
598
599
0
3
Boulos, Mr. Hanna
male
NaN
0
0
2664
7.2250
NaN
C
1
0
0
0
0
1
0.155882
0
599
600
1
1
Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan")
male
49.0
1
0
PC 17485
56.9292
A20
C
1
0
0
1
0
0
1.000000
0
600 rows × 20 columns
In [97]:
source['isAlone'] = 0
source['FamilySize'] = source['SibSp'] + source['Parch'] + 1
source.loc[source['FamilySize'] == 1, 'isAlone'] = 1
source
Out[97]:
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
...
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
Title
isChild
isAlone
FamilySize
0
1
0
3
Braund, Mr. Owen Harris
male
22.0
1
0
A/5 21171
7.2500
...
0
0
1
0
0
1
0.155882
0
0
2
1
2
1
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38.0
1
0
PC 17599
71.2833
...
1
0
0
1
0
0
0.811765
0
0
2
2
3
1
3
Heikkinen, Miss. Laina
female
26.0
0
0
STON/O2. 3101282
7.9250
...
0
0
1
0
0
1
0.703704
0
1
1
3
4
1
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35.0
1
0
113803
53.1000
...
0
0
1
1
0
0
0.811765
0
0
2
4
5
0
3
Allen, Mr. William Henry
male
35.0
0
0
373450
8.0500
...
0
0
1
0
0
1
0.155882
0
1
1
5
6
0
3
Moran, Mr. James
male
NaN
0
0
330877
8.4583
...
0
1
0
0
0
1
0.155882
0
1
1
6
7
0
1
McCarthy, Mr. Timothy J
male
54.0
0
0
17463
51.8625
...
0
0
1
1
0
0
0.155882
0
1
1
7
8
0
3
Palsson, Master. Gosta Leonard
male
2.0
3
1
349909
21.0750
...
0
0
1
0
0
1
0.518519
1
0
5
8
9
1
3
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
female
27.0
0
2
347742
11.1333
...
0
0
1
0
0
1
0.811765
0
0
3
9
10
1
2
Nasser, Mrs. Nicholas (Adele Achem)
female
14.0
1
0
237736
30.0708
...
1
0
0
0
1
0
0.811765
1
0
2
10
11
1
3
Sandstrom, Miss. Marguerite Rut
female
4.0
1
1
PP 9549
16.7000
...
0
0
1
0
0
1
0.703704
1
0
3
11
12
1
1
Bonnell, Miss. Elizabeth
female
58.0
0
0
113783
26.5500
...
0
0
1
1
0
0
0.703704
0
1
1
12
13
0
3
Saundercock, Mr. William Henry
male
20.0
0
0
A/5. 2151
8.0500
...
0
0
1
0
0
1
0.155882
0
1
1
13
14
0
3
Andersson, Mr. Anders Johan
male
39.0
1
5
347082
31.2750
...
0
0
1
0
0
1
0.155882
0
0
7
14
15
0
3
Vestrom, Miss. Hulda Amanda Adolfina
female
14.0
0
0
350406
7.8542
...
0
0
1
0
0
1
0.703704
1
1
1
15
16
1
2
Hewlett, Mrs. (Mary D Kingcome)
female
55.0
0
0
248706
16.0000
...
0
0
1
0
1
0
0.811765
0
1
1
16
17
0
3
Rice, Master. Eugene
male
2.0
4
1
382652
29.1250
...
0
1
0
0
0
1
0.518519
1
0
6
17
18
1
2
Williams, Mr. Charles Eugene
male
NaN
0
0
244373
13.0000
...
0
0
1
0
1
0
0.155882
0
1
1
18
19
0
3
Vander Planke, Mrs. Julius (Emelia Maria Vande...
female
31.0
1
0
345763
18.0000
...
0
0
1
0
0
1
0.811765
0
0
2
19
20
1
3
Masselmani, Mrs. Fatima
female
NaN
0
0
2649
7.2250
...
1
0
0
0
0
1
0.811765
0
1
1
20
21
0
2
Fynney, Mr. Joseph J
male
35.0
0
0
239865
26.0000
...
0
0
1
0
1
0
0.155882
0
1
1
21
22
1
2
Beesley, Mr. Lawrence
male
34.0
0
0
248698
13.0000
...
0
0
1
0
1
0
0.155882
0
1
1
22
23
1
3
McGowan, Miss. Anna "Annie"
female
15.0
0
0
330923
8.0292
...
0
1
0
0
0
1
0.703704
1
1
1
23
24
1
1
Sloper, Mr. William Thompson
male
28.0
0
0
113788
35.5000
...
0
0
1
1
0
0
0.155882
0
1
1
24
25
0
3
Palsson, Miss. Torborg Danira
female
8.0
3
1
349909
21.0750
...
0
0
1
0
0
1
0.703704
1
0
5
25
26
1
3
Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
female
38.0
1
5
347077
31.3875
...
0
0
1
0
0
1
0.811765
0
0
7
26
27
0
3
Emir, Mr. Farred Chehab
male
NaN
0
0
2631
7.2250
...
1
0
0
0
0
1
0.155882
0
1
1
27
28
0
1
Fortune, Mr. Charles Alexander
male
19.0
3
2
19950
263.0000
...
0
0
1
1
0
0
0.155882
0
0
6
28
29
1
3
O'Dwyer, Miss. Ellen "Nellie"
female
NaN
0
0
330959
7.8792
...
0
1
0
0
0
1
0.703704
0
1
1
29
30
0
3
Todoroff, Mr. Lalio
male
NaN
0
0
349216
7.8958
...
0
0
1
0
0
1
0.155882
0
1
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
570
571
1
2
Harris, Mr. George
male
62.0
0
0
S.W./PP 752
10.5000
...
0
0
1
0
1
0
0.155882
0
1
1
571
572
1
1
Appleton, Mrs. Edward Dale (Charlotte Lamson)
female
53.0
2
0
11769
51.4792
...
0
0
1
1
0
0
0.811765
0
0
3
572
573
1
1
Flynn, Mr. John Irwin ("Irving")
male
36.0
0
0
PC 17474
26.3875
...
0
0
1
1
0
0
0.155882
0
1
1
573
574
1
3
Kelly, Miss. Mary
female
NaN
0
0
14312
7.7500
...
0
1
0
0
0
1
0.703704
0
1
1
574
575
0
3
Rush, Mr. Alfred George John
male
16.0
0
0
A/4. 20589
8.0500
...
0
0
1
0
0
1
0.155882
1
1
1
575
576
0
3
Patchett, Mr. George
male
19.0
0
0
358585
14.5000
...
0
0
1
0
0
1
0.155882
0
1
1
576
577
1
2
Garside, Miss. Ethel
female
34.0
0
0
243880
13.0000
...
0
0
1
0
1
0
0.703704
0
1
1
577
578
1
1
Silvey, Mrs. William Baird (Alice Munger)
female
39.0
1
0
13507
55.9000
...
0
0
1
1
0
0
0.811765
0
0
2
578
579
0
3
Caram, Mrs. Joseph (Maria Elias)
female
NaN
1
0
2689
14.4583
...
1
0
0
0
0
1
0.811765
0
0
2
579
580
1
3
Jussila, Mr. Eiriik
male
32.0
0
0
STON/O 2. 3101286
7.9250
...
0
0
1
0
0
1
0.155882
0
1
1
580
581
1
2
Christy, Miss. Julie Rachel
female
25.0
1
1
237789
30.0000
...
0
0
1
0
1
0
0.703704
0
0
3
581
582
1
1
Thayer, Mrs. John Borland (Marian Longstreth M...
female
39.0
1
1
17421
110.8833
...
1
0
0
1
0
0
0.811765
0
0
3
582
583
0
2
Downton, Mr. William James
male
54.0
0
0
28403
26.0000
...
0
0
1
0
1
0
0.155882
0
1
1
583
584
0
1
Ross, Mr. John Hugo
male
36.0
0
0
13049
40.1250
...
1
0
0
1
0
0
0.155882
0
1
1
584
585
0
3
Paulner, Mr. Uscher
male
NaN
0
0
3411
8.7125
...
1
0
0
0
0
1
0.155882
0
1
1
585
586
1
1
Taussig, Miss. Ruth
female
18.0
0
2
110413
79.6500
...
0
0
1
1
0
0
0.703704
0
0
3
586
587
0
2
Jarvis, Mr. John Denzil
male
47.0
0
0
237565
15.0000
...
0
0
1
0
1
0
0.155882
0
1
1
587
588
1
1
Frolicher-Stehli, Mr. Maxmillian
male
60.0
1
1
13567
79.2000
...
1
0
0
1
0
0
0.155882
0
0
3
588
589
0
3
Gilinski, Mr. Eliezer
male
22.0
0
0
14973
8.0500
...
0
0
1
0
0
1
0.155882
0
1
1
589
590
0
3
Murdlin, Mr. Joseph
male
NaN
0
0
A./5. 3235
8.0500
...
0
0
1
0
0
1
0.155882
0
1
1
590
591
0
3
Rintamaki, Mr. Matti
male
35.0
0
0
STON/O 2. 3101273
7.1250
...
0
0
1
0
0
1
0.155882
0
1
1
591
592
1
1
Stephenson, Mrs. Walter Bertram (Martha Eustis)
female
52.0
1
0
36947
78.2667
...
1
0
0
1
0
0
0.811765
0
0
2
592
593
0
3
Elsbury, Mr. William James
male
47.0
0
0
A/5 3902
7.2500
...
0
0
1
0
0
1
0.155882
0
1
1
593
594
0
3
Bourke, Miss. Mary
female
NaN
0
2
364848
7.7500
...
0
1
0
0
0
1
0.703704
0
0
3
594
595
0
2
Chapman, Mr. John Henry
male
37.0
1
0
SC/AH 29037
26.0000
...
0
0
1
0
1
0
0.155882
0
0
2
595
596
0
3
Van Impe, Mr. Jean Baptiste
male
36.0
1
1
345773
24.1500
...
0
0
1
0
0
1
0.155882
0
0
3
596
597
1
2
Leitch, Miss. Jessie Wills
female
NaN
0
0
248727
33.0000
...
0
0
1
0
1
0
0.703704
0
1
1
597
598
0
3
Johnson, Mr. Alfred
male
49.0
0
0
LINE
0.0000
...
0
0
1
0
0
1
0.155882
0
1
1
598
599
0
3
Boulos, Mr. Hanna
male
NaN
0
0
2664
7.2250
...
1
0
0
0
0
1
0.155882
0
1
1
599
600
1
1
Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan")
male
49.0
1
0
PC 17485
56.9292
...
1
0
0
1
0
0
1.000000
0
0
2
600 rows × 22 columns
In [99]:
from sklearn.ensemble import RandomForestRegressor
def set_missing_ages(df, features, target):
# 根据所坐舱位等数字讯息推断年龄
target_df = df[features]
known = target_df[target_df[target].notnull()].as_matrix()
unknown = target_df[target_df[target].isnull()].as_matrix()
y = known[:, 0]
X = known[:, 1:]
if len(unknown):
rfr = RandomForestRegressor(
random_state=0, n_estimators=2000, n_jobs=-1)
rfr.fit(X, y)
predicted = rfr.predict(unknown[:, 1::])
df.loc[(df[target].isnull()), target] = predicted
return df
source = set_missing_ages(source, ['Age', 'Fare', 'Parch', 'SibSp', 'Pclass'], 'Age')
source['Sex'] = source['Sex'].map(lambda x: 1 if x == 'male' else 0)
source2 = source.filter(regex='isChild|isAlone|Title|Age|SibSp|Parch|Fare|Embarked_.*|Sex|Pclass_.*')
source2
Out[99]:
Sex
Age
SibSp
Parch
Fare
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
Title
isChild
isAlone
0
1
22.000000
1
0
7.2500
0
0
1
0
0
1
0.155882
0
0
1
0
38.000000
1
0
71.2833
1
0
0
1
0
0
0.811765
0
0
2
0
26.000000
0
0
7.9250
0
0
1
0
0
1
0.703704
0
1
3
0
35.000000
1
0
53.1000
0
0
1
1
0
0
0.811765
0
0
4
1
35.000000
0
0
8.0500
0
0
1
0
0
1
0.155882
0
1
5
1
31.459834
0
0
8.4583
0
1
0
0
0
1
0.155882
0
1
6
1
54.000000
0
0
51.8625
0
0
1
1
0
0
0.155882
0
1
7
1
2.000000
3
1
21.0750
0
0
1
0
0
1
0.518519
1
0
8
0
27.000000
0
2
11.1333
0
0
1
0
0
1
0.811765
0
0
9
0
14.000000
1
0
30.0708
1
0
0
0
1
0
0.811765
1
0
10
0
4.000000
1
1
16.7000
0
0
1
0
0
1
0.703704
1
0
11
0
58.000000
0
0
26.5500
0
0
1
1
0
0
0.703704
0
1
12
1
20.000000
0
0
8.0500
0
0
1
0
0
1
0.155882
0
1
13
1
39.000000
1
5
31.2750
0
0
1
0
0
1
0.155882
0
0
14
0
14.000000
0
0
7.8542
0
0
1
0
0
1
0.703704
1
1
15
0
55.000000
0
0
16.0000
0
0
1
0
1
0
0.811765
0
1
16
1
2.000000
4
1
29.1250
0
1
0
0
0
1
0.518519
1
0
17
1
31.621829
0
0
13.0000
0
0
1
0
1
0
0.155882
0
1
18
0
31.000000
1
0
18.0000
0
0
1
0
0
1
0.811765
0
0
19
0
32.150027
0
0
7.2250
1
0
0
0
0
1
0.811765
0
1
20
1
35.000000
0
0
26.0000
0
0
1
0
1
0
0.155882
0
1
21
1
34.000000
0
0
13.0000
0
0
1
0
1
0
0.155882
0
1
22
0
15.000000
0
0
8.0292
0
1
0
0
0
1
0.703704
1
1
23
1
28.000000
0
0
35.5000
0
0
1
1
0
0
0.155882
0
1
24
0
8.000000
3
1
21.0750
0
0
1
0
0
1
0.703704
1
0
25
0
38.000000
1
5
31.3875
0
0
1
0
0
1
0.811765
0
0
26
1
32.150027
0
0
7.2250
1
0
0
0
0
1
0.155882
0
1
27
1
19.000000
3
2
263.0000
0
0
1
1
0
0
0.155882
0
0
28
0
22.625180
0
0
7.8792
0
1
0
0
0
1
0.703704
0
1
29
1
29.032518
0
0
7.8958
0
0
1
0
0
1
0.155882
0
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
570
1
62.000000
0
0
10.5000
0
0
1
0
1
0
0.155882
0
1
571
0
53.000000
2
0
51.4792
0
0
1
1
0
0
0.811765
0
0
572
1
36.000000
0
0
26.3875
0
0
1
1
0
0
0.155882
0
1
573
0
38.173957
0
0
7.7500
0
1
0
0
0
1
0.703704
0
1
574
1
16.000000
0
0
8.0500
0
0
1
0
0
1
0.155882
1
1
575
1
19.000000
0
0
14.5000
0
0
1
0
0
1
0.155882
0
1
576
0
34.000000
0
0
13.0000
0
0
1
0
1
0
0.703704
0
1
577
0
39.000000
1
0
55.9000
0
0
1
1
0
0
0.811765
0
0
578
0
20.313637
1
0
14.4583
1
0
0
0
0
1
0.811765
0
0
579
1
32.000000
0
0
7.9250
0
0
1
0
0
1
0.155882
0
1
580
0
25.000000
1
1
30.0000
0
0
1
0
1
0
0.703704
0
0
581
0
39.000000
1
1
110.8833
1
0
0
1
0
0
0.811765
0
0
582
1
54.000000
0
0
26.0000
0
0
1
0
1
0
0.155882
0
1
583
1
36.000000
0
0
40.1250
1
0
0
1
0
0
0.155882
0
1
584
1
28.018904
0
0
8.7125
1
0
0
0
0
1
0.155882
0
1
585
0
18.000000
0
2
79.6500
0
0
1
1
0
0
0.703704
0
0
586
1
47.000000
0
0
15.0000
0
0
1
0
1
0
0.155882
0
1
587
1
60.000000
1
1
79.2000
1
0
0
1
0
0
0.155882
0
0
588
1
22.000000
0
0
8.0500
0
0
1
0
0
1
0.155882
0
1
589
1
28.839197
0
0
8.0500
0
0
1
0
0
1
0.155882
0
1
590
1
35.000000
0
0
7.1250
0
0
1
0
0
1
0.155882
0
1
591
0
52.000000
1
0
78.2667
1
0
0
1
0
0
0.811765
0
0
592
1
47.000000
0
0
7.2500
0
0
1
0
0
1
0.155882
0
1
593
0
27.694073
0
2
7.7500
0
1
0
0
0
1
0.703704
0
0
594
1
37.000000
1
0
26.0000
0
0
1
0
1
0
0.155882
0
0
595
1
36.000000
1
1
24.1500
0
0
1
0
0
1
0.155882
0
0
596
0
26.601009
0
0
33.0000
0
0
1
0
1
0
0.703704
0
1
597
1
49.000000
0
0
0.0000
0
0
1
0
0
1
0.155882
0
1
598
1
32.150027
0
0
7.2250
1
0
0
0
0
1
0.155882
0
1
599
1
49.000000
1
0
56.9292
1
0
0
1
0
0
1.000000
0
0
600 rows × 14 columns
In [101]:
from sklearn import preprocessing
preprocessing.MinMaxScaler().fit_transform(source2)
Out[101]:
array([[ 1. , 0.3024911 , 0.125 , ..., 0.15588235,
0. , 0. ],
[ 0. , 0.53024911, 0.125 , ..., 0.81176471,
0. , 0. ],
[ 0. , 0.3594306 , 0. , ..., 0.7037037 ,
0. , 1. ],
...,
[ 1. , 0.68683274, 0. , ..., 0.15588235,
0. , 1. ],
[ 1. , 0.44697547, 0. , ..., 0.15588235,
0. , 1. ],
[ 1. , 0.68683274, 0.125 , ..., 1. ,
0. , 0. ]])
In [ ]:
Content source: Jsonzhang/kaggle
Similar notebooks: