In [5]:
# Load the basic libraries...
import scipy as sp
import sklearn
import pandas as pd
%pylab inline
Populating the interactive namespace from numpy and matplotlib
VARIABLE DESCRIPTIONS: survival Survival (0 = No; 1 = Yes) pclass Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd) name Name sex Sex age Age sibsp Number of Siblings/Spouses Aboard parch Number of Parents/Children Aboard ticket Ticket Number fare Passenger Fare cabin Cabin embarked Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton) SPECIAL NOTES: Pclass is a proxy for socio-economic status (SES) 1st ~ Upper; 2nd ~ Middle; 3rd ~ Lower Age is in Years; Fractional if Age less than One (1) If the Age is Estimated, it is in the form xx.5 With respect to the family relation variables (i.e. sibsp and parch) some relations were ignored. The following are the definitions used for sibsp and parch. Sibling: Brother, Sister, Stepbrother, or Stepsister of Passenger Aboard Titanic Spouse: Husband or Wife of Passenger Aboard Titanic (Mistresses and Fiances Ignored) Parent: Mother or Father of Passenger Aboard Titanic Child: Son, Daughter, Stepson, or Stepdaughter of Passenger Aboard Titanic Other family relatives excluded from this study include cousins, nephews/nieces, aunts/uncles, and in-laws. Some children travelled only with a nanny, therefore parch=0 for them. As well, some travelled with very close friends or neighbors in a village, however, the definitions do not support such relations.
In [6]:
titanic_df = pd.read_csv("/home/jan/su-jupyter/data/titanic-train.csv")
titanic_df
Out[6]:
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
0
1
0
3
Braund, Mr. Owen Harris
male
22
1
0
A/5 21171
7.2500
NaN
S
1
2
1
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38
1
0
PC 17599
71.2833
C85
C
2
3
1
3
Heikkinen, Miss. Laina
female
26
0
0
STON/O2. 3101282
7.9250
NaN
S
3
4
1
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35
1
0
113803
53.1000
C123
S
4
5
0
3
Allen, Mr. William Henry
male
35
0
0
373450
8.0500
NaN
S
5
6
0
3
Moran, Mr. James
male
NaN
0
0
330877
8.4583
NaN
Q
6
7
0
1
McCarthy, Mr. Timothy J
male
54
0
0
17463
51.8625
E46
S
7
8
0
3
Palsson, Master. Gosta Leonard
male
2
3
1
349909
21.0750
NaN
S
8
9
1
3
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
female
27
0
2
347742
11.1333
NaN
S
9
10
1
2
Nasser, Mrs. Nicholas (Adele Achem)
female
14
1
0
237736
30.0708
NaN
C
10
11
1
3
Sandstrom, Miss. Marguerite Rut
female
4
1
1
PP 9549
16.7000
G6
S
11
12
1
1
Bonnell, Miss. Elizabeth
female
58
0
0
113783
26.5500
C103
S
12
13
0
3
Saundercock, Mr. William Henry
male
20
0
0
A/5. 2151
8.0500
NaN
S
13
14
0
3
Andersson, Mr. Anders Johan
male
39
1
5
347082
31.2750
NaN
S
14
15
0
3
Vestrom, Miss. Hulda Amanda Adolfina
female
14
0
0
350406
7.8542
NaN
S
15
16
1
2
Hewlett, Mrs. (Mary D Kingcome)
female
55
0
0
248706
16.0000
NaN
S
16
17
0
3
Rice, Master. Eugene
male
2
4
1
382652
29.1250
NaN
Q
17
18
1
2
Williams, Mr. Charles Eugene
male
NaN
0
0
244373
13.0000
NaN
S
18
19
0
3
Vander Planke, Mrs. Julius (Emelia Maria Vande...
female
31
1
0
345763
18.0000
NaN
S
19
20
1
3
Masselmani, Mrs. Fatima
female
NaN
0
0
2649
7.2250
NaN
C
20
21
0
2
Fynney, Mr. Joseph J
male
35
0
0
239865
26.0000
NaN
S
21
22
1
2
Beesley, Mr. Lawrence
male
34
0
0
248698
13.0000
D56
S
22
23
1
3
McGowan, Miss. Anna "Annie"
female
15
0
0
330923
8.0292
NaN
Q
23
24
1
1
Sloper, Mr. William Thompson
male
28
0
0
113788
35.5000
A6
S
24
25
0
3
Palsson, Miss. Torborg Danira
female
8
3
1
349909
21.0750
NaN
S
25
26
1
3
Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
female
38
1
5
347077
31.3875
NaN
S
26
27
0
3
Emir, Mr. Farred Chehab
male
NaN
0
0
2631
7.2250
NaN
C
27
28
0
1
Fortune, Mr. Charles Alexander
male
19
3
2
19950
263.0000
C23 C25 C27
S
28
29
1
3
O'Dwyer, Miss. Ellen "Nellie"
female
NaN
0
0
330959
7.8792
NaN
Q
29
30
0
3
Todoroff, Mr. Lalio
male
NaN
0
0
349216
7.8958
NaN
S
...
...
...
...
...
...
...
...
...
...
...
...
...
861
862
0
2
Giles, Mr. Frederick Edward
male
21
1
0
28134
11.5000
NaN
S
862
863
1
1
Swift, Mrs. Frederick Joel (Margaret Welles Ba...
female
48
0
0
17466
25.9292
D17
S
863
864
0
3
Sage, Miss. Dorothy Edith "Dolly"
female
NaN
8
2
CA. 2343
69.5500
NaN
S
864
865
0
2
Gill, Mr. John William
male
24
0
0
233866
13.0000
NaN
S
865
866
1
2
Bystrom, Mrs. (Karolina)
female
42
0
0
236852
13.0000
NaN
S
866
867
1
2
Duran y More, Miss. Asuncion
female
27
1
0
SC/PARIS 2149
13.8583
NaN
C
867
868
0
1
Roebling, Mr. Washington Augustus II
male
31
0
0
PC 17590
50.4958
A24
S
868
869
0
3
van Melkebeke, Mr. Philemon
male
NaN
0
0
345777
9.5000
NaN
S
869
870
1
3
Johnson, Master. Harold Theodor
male
4
1
1
347742
11.1333
NaN
S
870
871
0
3
Balkic, Mr. Cerin
male
26
0
0
349248
7.8958
NaN
S
871
872
1
1
Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
female
47
1
1
11751
52.5542
D35
S
872
873
0
1
Carlsson, Mr. Frans Olof
male
33
0
0
695
5.0000
B51 B53 B55
S
873
874
0
3
Vander Cruyssen, Mr. Victor
male
47
0
0
345765
9.0000
NaN
S
874
875
1
2
Abelson, Mrs. Samuel (Hannah Wizosky)
female
28
1
0
P/PP 3381
24.0000
NaN
C
875
876
1
3
Najib, Miss. Adele Kiamie "Jane"
female
15
0
0
2667
7.2250
NaN
C
876
877
0
3
Gustafsson, Mr. Alfred Ossian
male
20
0
0
7534
9.8458
NaN
S
877
878
0
3
Petroff, Mr. Nedelio
male
19
0
0
349212
7.8958
NaN
S
878
879
0
3
Laleff, Mr. Kristo
male
NaN
0
0
349217
7.8958
NaN
S
879
880
1
1
Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
female
56
0
1
11767
83.1583
C50
C
880
881
1
2
Shelley, Mrs. William (Imanita Parrish Hall)
female
25
0
1
230433
26.0000
NaN
S
881
882
0
3
Markun, Mr. Johann
male
33
0
0
349257
7.8958
NaN
S
882
883
0
3
Dahlberg, Miss. Gerda Ulrika
female
22
0
0
7552
10.5167
NaN
S
883
884
0
2
Banfield, Mr. Frederick James
male
28
0
0
C.A./SOTON 34068
10.5000
NaN
S
884
885
0
3
Sutehall, Mr. Henry Jr
male
25
0
0
SOTON/OQ 392076
7.0500
NaN
S
885
886
0
3
Rice, Mrs. William (Margaret Norton)
female
39
0
5
382652
29.1250
NaN
Q
886
887
0
2
Montvila, Rev. Juozas
male
27
0
0
211536
13.0000
NaN
S
887
888
1
1
Graham, Miss. Margaret Edith
female
19
0
0
112053
30.0000
B42
S
888
889
0
3
Johnston, Miss. Catherine Helen "Carrie"
female
NaN
1
2
W./C. 6607
23.4500
NaN
S
889
890
1
1
Behr, Mr. Karl Howell
male
26
0
0
111369
30.0000
C148
C
890
891
0
3
Dooley, Mr. Patrick
male
32
0
0
370376
7.7500
NaN
Q
891 rows × 12 columns
In [3]:
titanic_df.drop(['PassengerId'], axis=1, inplace=True)
In [4]:
titanic_df.describe()
Out[4]:
Survived
Pclass
Age
SibSp
Parch
Fare
count
891.000000
891.000000
714.000000
891.000000
891.000000
891.000000
mean
0.383838
2.308642
29.699118
0.523008
0.381594
32.204208
std
0.486592
0.836071
14.526497
1.102743
0.806057
49.693429
min
0.000000
1.000000
0.420000
0.000000
0.000000
0.000000
25%
0.000000
2.000000
20.125000
0.000000
0.000000
7.910400
50%
0.000000
3.000000
28.000000
0.000000
0.000000
14.454200
75%
1.000000
3.000000
38.000000
1.000000
0.000000
31.000000
max
1.000000
3.000000
80.000000
8.000000
6.000000
512.329200
In [8]:
titanic_df1 = titanic_df[['Pclass', 'Sex', 'Age','Survived']]
titanic_df1
Out[8]:
Pclass
Sex
Age
Survived
0
3
male
22
0
1
1
female
38
1
2
3
female
26
1
3
1
female
35
1
4
3
male
35
0
5
3
male
NaN
0
6
1
male
54
0
7
3
male
2
0
8
3
female
27
1
9
2
female
14
1
10
3
female
4
1
11
1
female
58
1
12
3
male
20
0
13
3
male
39
0
14
3
female
14
0
15
2
female
55
1
16
3
male
2
0
17
2
male
NaN
1
18
3
female
31
0
19
3
female
NaN
1
20
2
male
35
0
21
2
male
34
1
22
3
female
15
1
23
1
male
28
1
24
3
female
8
0
25
3
female
38
1
26
3
male
NaN
0
27
1
male
19
0
28
3
female
NaN
1
29
3
male
NaN
0
...
...
...
...
...
861
2
male
21
0
862
1
female
48
1
863
3
female
NaN
0
864
2
male
24
0
865
2
female
42
1
866
2
female
27
1
867
1
male
31
0
868
3
male
NaN
0
869
3
male
4
1
870
3
male
26
0
871
1
female
47
1
872
1
male
33
0
873
3
male
47
0
874
2
female
28
1
875
3
female
15
1
876
3
male
20
0
877
3
male
19
0
878
3
male
NaN
0
879
1
female
56
1
880
2
female
25
1
881
3
male
33
0
882
3
female
22
0
883
2
male
28
0
884
3
male
25
0
885
3
female
39
0
886
2
male
27
0
887
1
female
19
1
888
3
female
NaN
0
889
1
male
26
1
890
3
male
32
0
891 rows × 4 columns
In [9]:
survivors = titanic_df1[titanic_df1['Survived']==1]
victims = titanic_df1[titanic_df1['Survived']==0]
In [11]:
scatter(titanic_df1['Age'], titanic_df1['Pclass'],
c=titanic_df1['Survived'], cmap='prism', marker='o', s=100, alpha=0.5);
In [12]:
titanic_X = titanic_df[['Pclass', 'Sex', 'Age']].as_matrix()
titanic_y = titanic_df['Survived'].as_matrix()
In [13]:
shape(titanic_X), shape(titanic_y)
Out[13]:
((891, 3), (891,))
In [14]:
titanic_X
Out[14]:
array([[3, 'male', 22.0],
[1, 'female', 38.0],
[3, 'female', 26.0],
...,
[3, 'female', nan],
[1, 'male', 26.0],
[3, 'male', 32.0]], dtype=object)
In [15]:
titanic_y
Out[15]:
array([0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1,
1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0,
0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0,
1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1,
0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0,
1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0,
0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1,
0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1,
1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0])
In [16]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
In [17]:
titanic_X[:,1] = le.fit_transform(titanic_X[:,1])
print titanic_X
[[3 1 22.0]
[1 0 38.0]
[3 0 26.0]
...,
[3 0 nan]
[1 1 26.0]
[3 1 32.0]]
In [18]:
from sklearn.preprocessing import Imputer
imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
titanic_X = imp.fit_transform(titanic_X)
print titanic_X
[[ 3. 1. 22. ]
[ 1. 0. 38. ]
[ 3. 0. 26. ]
...,
[ 3. 0. 29.69911765]
[ 1. 1. 26. ]
[ 3. 1. 32. ]]
In [20]:
from sklearn import tree
clf = tree.DecisionTreeClassifier(max_depth=3)
clf = clf.fit(titanic_X, titanic_y)
In [21]:
titanic_y_predicted = clf.predict(titanic_X)
In [22]:
titanic_df.insert(1,'Survior pred', titanic_y_predicted)
titanic_df
Out[22]:
PassengerId
Survior pred
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
0
1
0
0
3
Braund, Mr. Owen Harris
male
22
1
0
A/5 21171
7.2500
NaN
S
1
2
1
1
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38
1
0
PC 17599
71.2833
C85
C
2
3
1
1
3
Heikkinen, Miss. Laina
female
26
0
0
STON/O2. 3101282
7.9250
NaN
S
3
4
1
1
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35
1
0
113803
53.1000
C123
S
4
5
0
0
3
Allen, Mr. William Henry
male
35
0
0
373450
8.0500
NaN
S
5
6
0
0
3
Moran, Mr. James
male
NaN
0
0
330877
8.4583
NaN
Q
6
7
0
0
1
McCarthy, Mr. Timothy J
male
54
0
0
17463
51.8625
E46
S
7
8
0
0
3
Palsson, Master. Gosta Leonard
male
2
3
1
349909
21.0750
NaN
S
8
9
1
1
3
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
female
27
0
2
347742
11.1333
NaN
S
9
10
1
1
2
Nasser, Mrs. Nicholas (Adele Achem)
female
14
1
0
237736
30.0708
NaN
C
10
11
1
1
3
Sandstrom, Miss. Marguerite Rut
female
4
1
1
PP 9549
16.7000
G6
S
11
12
1
1
1
Bonnell, Miss. Elizabeth
female
58
0
0
113783
26.5500
C103
S
12
13
0
0
3
Saundercock, Mr. William Henry
male
20
0
0
A/5. 2151
8.0500
NaN
S
13
14
0
0
3
Andersson, Mr. Anders Johan
male
39
1
5
347082
31.2750
NaN
S
14
15
1
0
3
Vestrom, Miss. Hulda Amanda Adolfina
female
14
0
0
350406
7.8542
NaN
S
15
16
1
1
2
Hewlett, Mrs. (Mary D Kingcome)
female
55
0
0
248706
16.0000
NaN
S
16
17
0
0
3
Rice, Master. Eugene
male
2
4
1
382652
29.1250
NaN
Q
17
18
0
1
2
Williams, Mr. Charles Eugene
male
NaN
0
0
244373
13.0000
NaN
S
18
19
1
0
3
Vander Planke, Mrs. Julius (Emelia Maria Vande...
female
31
1
0
345763
18.0000
NaN
S
19
20
1
1
3
Masselmani, Mrs. Fatima
female
NaN
0
0
2649
7.2250
NaN
C
20
21
0
0
2
Fynney, Mr. Joseph J
male
35
0
0
239865
26.0000
NaN
S
21
22
0
1
2
Beesley, Mr. Lawrence
male
34
0
0
248698
13.0000
D56
S
22
23
1
1
3
McGowan, Miss. Anna "Annie"
female
15
0
0
330923
8.0292
NaN
Q
23
24
0
1
1
Sloper, Mr. William Thompson
male
28
0
0
113788
35.5000
A6
S
24
25
1
0
3
Palsson, Miss. Torborg Danira
female
8
3
1
349909
21.0750
NaN
S
25
26
1
1
3
Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
female
38
1
5
347077
31.3875
NaN
S
26
27
0
0
3
Emir, Mr. Farred Chehab
male
NaN
0
0
2631
7.2250
NaN
C
27
28
0
0
1
Fortune, Mr. Charles Alexander
male
19
3
2
19950
263.0000
C23 C25 C27
S
28
29
1
1
3
O'Dwyer, Miss. Ellen "Nellie"
female
NaN
0
0
330959
7.8792
NaN
Q
29
30
0
0
3
Todoroff, Mr. Lalio
male
NaN
0
0
349216
7.8958
NaN
S
...
...
...
...
...
...
...
...
...
...
...
...
...
...
861
862
0
0
2
Giles, Mr. Frederick Edward
male
21
1
0
28134
11.5000
NaN
S
862
863
1
1
1
Swift, Mrs. Frederick Joel (Margaret Welles Ba...
female
48
0
0
17466
25.9292
D17
S
863
864
1
0
3
Sage, Miss. Dorothy Edith "Dolly"
female
NaN
8
2
CA. 2343
69.5500
NaN
S
864
865
0
0
2
Gill, Mr. John William
male
24
0
0
233866
13.0000
NaN
S
865
866
1
1
2
Bystrom, Mrs. (Karolina)
female
42
0
0
236852
13.0000
NaN
S
866
867
1
1
2
Duran y More, Miss. Asuncion
female
27
1
0
SC/PARIS 2149
13.8583
NaN
C
867
868
0
0
1
Roebling, Mr. Washington Augustus II
male
31
0
0
PC 17590
50.4958
A24
S
868
869
0
0
3
van Melkebeke, Mr. Philemon
male
NaN
0
0
345777
9.5000
NaN
S
869
870
0
1
3
Johnson, Master. Harold Theodor
male
4
1
1
347742
11.1333
NaN
S
870
871
0
0
3
Balkic, Mr. Cerin
male
26
0
0
349248
7.8958
NaN
S
871
872
1
1
1
Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
female
47
1
1
11751
52.5542
D35
S
872
873
0
0
1
Carlsson, Mr. Frans Olof
male
33
0
0
695
5.0000
B51 B53 B55
S
873
874
0
0
3
Vander Cruyssen, Mr. Victor
male
47
0
0
345765
9.0000
NaN
S
874
875
1
1
2
Abelson, Mrs. Samuel (Hannah Wizosky)
female
28
1
0
P/PP 3381
24.0000
NaN
C
875
876
1
1
3
Najib, Miss. Adele Kiamie "Jane"
female
15
0
0
2667
7.2250
NaN
C
876
877
0
0
3
Gustafsson, Mr. Alfred Ossian
male
20
0
0
7534
9.8458
NaN
S
877
878
0
0
3
Petroff, Mr. Nedelio
male
19
0
0
349212
7.8958
NaN
S
878
879
0
0
3
Laleff, Mr. Kristo
male
NaN
0
0
349217
7.8958
NaN
S
879
880
1
1
1
Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
female
56
0
1
11767
83.1583
C50
C
880
881
1
1
2
Shelley, Mrs. William (Imanita Parrish Hall)
female
25
0
1
230433
26.0000
NaN
S
881
882
0
0
3
Markun, Mr. Johann
male
33
0
0
349257
7.8958
NaN
S
882
883
1
0
3
Dahlberg, Miss. Gerda Ulrika
female
22
0
0
7552
10.5167
NaN
S
883
884
0
0
2
Banfield, Mr. Frederick James
male
28
0
0
C.A./SOTON 34068
10.5000
NaN
S
884
885
0
0
3
Sutehall, Mr. Henry Jr
male
25
0
0
SOTON/OQ 392076
7.0500
NaN
S
885
886
0
0
3
Rice, Mrs. William (Margaret Norton)
female
39
0
5
382652
29.1250
NaN
Q
886
887
0
0
2
Montvila, Rev. Juozas
male
27
0
0
211536
13.0000
NaN
S
887
888
1
1
1
Graham, Miss. Margaret Edith
female
19
0
0
112053
30.0000
B42
S
888
889
1
0
3
Johnston, Miss. Catherine Helen "Carrie"
female
NaN
1
2
W./C. 6607
23.4500
NaN
S
889
890
0
1
1
Behr, Mr. Karl Howell
male
26
0
0
111369
30.0000
C148
C
890
891
0
0
3
Dooley, Mr. Patrick
male
32
0
0
370376
7.7500
NaN
Q
891 rows × 13 columns
In [19]:
from sklearn.metrics import accuracy_score
accuracy_score(titanic_y, titanic_y_predicted)
Out[19]:
0.80920314253647585
In [20]:
from sklearn.externals.six import StringIO
import pyparsing
import pydot
from IPython.display import Image
dot_data = StringIO()
tree.export_graphviz(clf, out_file=dot_data, feature_names=['Pclass', 'Sex', 'Age'])
graph = pydot.graph_from_dot_data(dot_data.getvalue())
img = Image(graph.create_png())
In [50]:
img.width=800; img
Out[50]:
In [51]:
titanic_y_predicted_proba = clf.predict_proba(titanic_X)
In [52]:
titanic_df.insert(2,'Survior prob', titanic_y_predicted_proba[:,1])
titanic_df
Out[52]:
Survived
Survior pred
Survior prob
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
0
0
0
0.115473
3
Braund, Mr. Owen Harris
male
22
1
0
A/5 21171
7.2500
NaN
S
1
1
1
0.952381
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38
1
0
PC 17599
71.2833
C85
C
2
1
1
0.537879
3
Heikkinen, Miss. Laina
female
26
0
0
STON/O2. 3101282
7.9250
NaN
S
3
1
1
0.952381
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35
1
0
113803
53.1000
C123
S
4
0
0
0.115473
3
Allen, Mr. William Henry
male
35
0
0
373450
8.0500
NaN
S
5
0
0
0.115473
3
Moran, Mr. James
male
NaN
0
0
330877
8.4583
NaN
Q
6
0
0
0.358333
1
McCarthy, Mr. Timothy J
male
54
0
0
17463
51.8625
E46
S
7
0
0
0.428571
3
Palsson, Master. Gosta Leonard
male
2
3
1
349909
21.0750
NaN
S
8
1
1
0.537879
3
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
female
27
0
2
347742
11.1333
NaN
S
9
1
1
0.952381
2
Nasser, Mrs. Nicholas (Adele Achem)
female
14
1
0
237736
30.0708
NaN
C
10
1
1
0.537879
3
Sandstrom, Miss. Marguerite Rut
female
4
1
1
PP 9549
16.7000
G6
S
11
1
1
0.952381
1
Bonnell, Miss. Elizabeth
female
58
0
0
113783
26.5500
C103
S
12
0
0
0.115473
3
Saundercock, Mr. William Henry
male
20
0
0
A/5. 2151
8.0500
NaN
S
13
0
0
0.115473
3
Andersson, Mr. Anders Johan
male
39
1
5
347082
31.2750
NaN
S
14
0
1
0.537879
3
Vestrom, Miss. Hulda Amanda Adolfina
female
14
0
0
350406
7.8542
NaN
S
15
1
1
0.952381
2
Hewlett, Mrs. (Mary D Kingcome)
female
55
0
0
248706
16.0000
NaN
S
16
0
0
0.428571
3
Rice, Master. Eugene
male
2
4
1
382652
29.1250
NaN
Q
17
1
0
0.115473
2
Williams, Mr. Charles Eugene
male
NaN
0
0
244373
13.0000
NaN
S
18
0
1
0.537879
3
Vander Planke, Mrs. Julius (Emelia Maria Vande...
female
31
1
0
345763
18.0000
NaN
S
19
1
1
0.537879
3
Masselmani, Mrs. Fatima
female
NaN
0
0
2649
7.2250
NaN
C
20
0
0
0.115473
2
Fynney, Mr. Joseph J
male
35
0
0
239865
26.0000
NaN
S
21
1
0
0.115473
2
Beesley, Mr. Lawrence
male
34
0
0
248698
13.0000
D56
S
22
1
1
0.537879
3
McGowan, Miss. Anna "Annie"
female
15
0
0
330923
8.0292
NaN
Q
23
1
0
0.358333
1
Sloper, Mr. William Thompson
male
28
0
0
113788
35.5000
A6
S
24
0
1
0.537879
3
Palsson, Miss. Torborg Danira
female
8
3
1
349909
21.0750
NaN
S
25
1
1
0.537879
3
Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
female
38
1
5
347077
31.3875
NaN
S
26
0
0
0.115473
3
Emir, Mr. Farred Chehab
male
NaN
0
0
2631
7.2250
NaN
C
27
0
0
0.358333
1
Fortune, Mr. Charles Alexander
male
19
3
2
19950
263.0000
C23 C25 C27
S
28
1
1
0.537879
3
O'Dwyer, Miss. Ellen "Nellie"
female
NaN
0
0
330959
7.8792
NaN
Q
29
0
0
0.115473
3
Todoroff, Mr. Lalio
male
NaN
0
0
349216
7.8958
NaN
S
...
...
...
...
...
...
...
...
...
...
...
...
...
...
861
0
0
0.115473
2
Giles, Mr. Frederick Edward
male
21
1
0
28134
11.5000
NaN
S
862
1
1
0.952381
1
Swift, Mrs. Frederick Joel (Margaret Welles Ba...
female
48
0
0
17466
25.9292
D17
S
863
0
1
0.537879
3
Sage, Miss. Dorothy Edith "Dolly"
female
NaN
8
2
CA. 2343
69.5500
NaN
S
864
0
0
0.115473
2
Gill, Mr. John William
male
24
0
0
233866
13.0000
NaN
S
865
1
1
0.952381
2
Bystrom, Mrs. (Karolina)
female
42
0
0
236852
13.0000
NaN
S
866
1
1
0.952381
2
Duran y More, Miss. Asuncion
female
27
1
0
SC/PARIS 2149
13.8583
NaN
C
867
0
0
0.358333
1
Roebling, Mr. Washington Augustus II
male
31
0
0
PC 17590
50.4958
A24
S
868
0
0
0.115473
3
van Melkebeke, Mr. Philemon
male
NaN
0
0
345777
9.5000
NaN
S
869
1
0
0.428571
3
Johnson, Master. Harold Theodor
male
4
1
1
347742
11.1333
NaN
S
870
0
0
0.115473
3
Balkic, Mr. Cerin
male
26
0
0
349248
7.8958
NaN
S
871
1
1
0.952381
1
Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
female
47
1
1
11751
52.5542
D35
S
872
0
0
0.358333
1
Carlsson, Mr. Frans Olof
male
33
0
0
695
5.0000
B51 B53 B55
S
873
0
0
0.115473
3
Vander Cruyssen, Mr. Victor
male
47
0
0
345765
9.0000
NaN
S
874
1
1
0.952381
2
Abelson, Mrs. Samuel (Hannah Wizosky)
female
28
1
0
P/PP 3381
24.0000
NaN
C
875
1
1
0.537879
3
Najib, Miss. Adele Kiamie "Jane"
female
15
0
0
2667
7.2250
NaN
C
876
0
0
0.115473
3
Gustafsson, Mr. Alfred Ossian
male
20
0
0
7534
9.8458
NaN
S
877
0
0
0.115473
3
Petroff, Mr. Nedelio
male
19
0
0
349212
7.8958
NaN
S
878
0
0
0.115473
3
Laleff, Mr. Kristo
male
NaN
0
0
349217
7.8958
NaN
S
879
1
1
0.952381
1
Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
female
56
0
1
11767
83.1583
C50
C
880
1
1
0.952381
2
Shelley, Mrs. William (Imanita Parrish Hall)
female
25
0
1
230433
26.0000
NaN
S
881
0
0
0.115473
3
Markun, Mr. Johann
male
33
0
0
349257
7.8958
NaN
S
882
0
1
0.537879
3
Dahlberg, Miss. Gerda Ulrika
female
22
0
0
7552
10.5167
NaN
S
883
0
0
0.115473
2
Banfield, Mr. Frederick James
male
28
0
0
C.A./SOTON 34068
10.5000
NaN
S
884
0
0
0.115473
3
Sutehall, Mr. Henry Jr
male
25
0
0
SOTON/OQ 392076
7.0500
NaN
S
885
0
0
0.083333
3
Rice, Mrs. William (Margaret Norton)
female
39
0
5
382652
29.1250
NaN
Q
886
0
0
0.115473
2
Montvila, Rev. Juozas
male
27
0
0
211536
13.0000
NaN
S
887
1
1
0.952381
1
Graham, Miss. Margaret Edith
female
19
0
0
112053
30.0000
B42
S
888
0
1
0.537879
3
Johnston, Miss. Catherine Helen "Carrie"
female
NaN
1
2
W./C. 6607
23.4500
NaN
S
889
1
0
0.358333
1
Behr, Mr. Karl Howell
male
26
0
0
111369
30.0000
C148
C
890
0
0
0.115473
3
Dooley, Mr. Patrick
male
32
0
0
370376
7.7500
NaN
Q
891 rows × 13 columns
In [35]:
# Pclass, Sex, Age
x_male_student = sp.array([3, 1, 21])
x_rich_countess = sp.array([1, 0, 65])
x_midleclass_mother = sp.array([2, 0, 40])
x_baby = sp.array([1, 0, 1])
In [32]:
clf.predict_proba(x_male_student)
Out[32]:
array([[ 0.88452656, 0.11547344]])
In [33]:
clf.predict_proba(x_rich_countess)
Out[33]:
array([[ 0.04761905, 0.95238095]])
In [36]:
clf.predict_proba(x_midleclass_mother)
Out[36]:
array([[ 0.04761905, 0.95238095]])
In [37]:
clf.predict_proba(x_baby)
Out[37]:
array([[ 0.5, 0.5]])
Content source: jsnajder/MachineLearningTutorial
Similar notebooks: