In [55]:
import pandas as pd
import numpy as np
from pandas import Series,DataFrame
data_train = pd.read_csv("train.csv")
In [56]:
data_train
Out[56]:
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
0
1
0
3
Braund, Mr. Owen Harris
male
22.0
1
0
A/5 21171
7.2500
NaN
S
1
2
1
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38.0
1
0
PC 17599
71.2833
C85
C
2
3
1
3
Heikkinen, Miss. Laina
female
26.0
0
0
STON/O2. 3101282
7.9250
NaN
S
3
4
1
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35.0
1
0
113803
53.1000
C123
S
4
5
0
3
Allen, Mr. William Henry
male
35.0
0
0
373450
8.0500
NaN
S
5
6
0
3
Moran, Mr. James
male
NaN
0
0
330877
8.4583
NaN
Q
6
7
0
1
McCarthy, Mr. Timothy J
male
54.0
0
0
17463
51.8625
E46
S
7
8
0
3
Palsson, Master. Gosta Leonard
male
2.0
3
1
349909
21.0750
NaN
S
8
9
1
3
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
female
27.0
0
2
347742
11.1333
NaN
S
9
10
1
2
Nasser, Mrs. Nicholas (Adele Achem)
female
14.0
1
0
237736
30.0708
NaN
C
10
11
1
3
Sandstrom, Miss. Marguerite Rut
female
4.0
1
1
PP 9549
16.7000
G6
S
11
12
1
1
Bonnell, Miss. Elizabeth
female
58.0
0
0
113783
26.5500
C103
S
12
13
0
3
Saundercock, Mr. William Henry
male
20.0
0
0
A/5. 2151
8.0500
NaN
S
13
14
0
3
Andersson, Mr. Anders Johan
male
39.0
1
5
347082
31.2750
NaN
S
14
15
0
3
Vestrom, Miss. Hulda Amanda Adolfina
female
14.0
0
0
350406
7.8542
NaN
S
15
16
1
2
Hewlett, Mrs. (Mary D Kingcome)
female
55.0
0
0
248706
16.0000
NaN
S
16
17
0
3
Rice, Master. Eugene
male
2.0
4
1
382652
29.1250
NaN
Q
17
18
1
2
Williams, Mr. Charles Eugene
male
NaN
0
0
244373
13.0000
NaN
S
18
19
0
3
Vander Planke, Mrs. Julius (Emelia Maria Vande...
female
31.0
1
0
345763
18.0000
NaN
S
19
20
1
3
Masselmani, Mrs. Fatima
female
NaN
0
0
2649
7.2250
NaN
C
20
21
0
2
Fynney, Mr. Joseph J
male
35.0
0
0
239865
26.0000
NaN
S
21
22
1
2
Beesley, Mr. Lawrence
male
34.0
0
0
248698
13.0000
D56
S
22
23
1
3
McGowan, Miss. Anna "Annie"
female
15.0
0
0
330923
8.0292
NaN
Q
23
24
1
1
Sloper, Mr. William Thompson
male
28.0
0
0
113788
35.5000
A6
S
24
25
0
3
Palsson, Miss. Torborg Danira
female
8.0
3
1
349909
21.0750
NaN
S
25
26
1
3
Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
female
38.0
1
5
347077
31.3875
NaN
S
26
27
0
3
Emir, Mr. Farred Chehab
male
NaN
0
0
2631
7.2250
NaN
C
27
28
0
1
Fortune, Mr. Charles Alexander
male
19.0
3
2
19950
263.0000
C23 C25 C27
S
28
29
1
3
O'Dwyer, Miss. Ellen "Nellie"
female
NaN
0
0
330959
7.8792
NaN
Q
29
30
0
3
Todoroff, Mr. Lalio
male
NaN
0
0
349216
7.8958
NaN
S
...
...
...
...
...
...
...
...
...
...
...
...
...
861
862
0
2
Giles, Mr. Frederick Edward
male
21.0
1
0
28134
11.5000
NaN
S
862
863
1
1
Swift, Mrs. Frederick Joel (Margaret Welles Ba...
female
48.0
0
0
17466
25.9292
D17
S
863
864
0
3
Sage, Miss. Dorothy Edith "Dolly"
female
NaN
8
2
CA. 2343
69.5500
NaN
S
864
865
0
2
Gill, Mr. John William
male
24.0
0
0
233866
13.0000
NaN
S
865
866
1
2
Bystrom, Mrs. (Karolina)
female
42.0
0
0
236852
13.0000
NaN
S
866
867
1
2
Duran y More, Miss. Asuncion
female
27.0
1
0
SC/PARIS 2149
13.8583
NaN
C
867
868
0
1
Roebling, Mr. Washington Augustus II
male
31.0
0
0
PC 17590
50.4958
A24
S
868
869
0
3
van Melkebeke, Mr. Philemon
male
NaN
0
0
345777
9.5000
NaN
S
869
870
1
3
Johnson, Master. Harold Theodor
male
4.0
1
1
347742
11.1333
NaN
S
870
871
0
3
Balkic, Mr. Cerin
male
26.0
0
0
349248
7.8958
NaN
S
871
872
1
1
Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
female
47.0
1
1
11751
52.5542
D35
S
872
873
0
1
Carlsson, Mr. Frans Olof
male
33.0
0
0
695
5.0000
B51 B53 B55
S
873
874
0
3
Vander Cruyssen, Mr. Victor
male
47.0
0
0
345765
9.0000
NaN
S
874
875
1
2
Abelson, Mrs. Samuel (Hannah Wizosky)
female
28.0
1
0
P/PP 3381
24.0000
NaN
C
875
876
1
3
Najib, Miss. Adele Kiamie "Jane"
female
15.0
0
0
2667
7.2250
NaN
C
876
877
0
3
Gustafsson, Mr. Alfred Ossian
male
20.0
0
0
7534
9.8458
NaN
S
877
878
0
3
Petroff, Mr. Nedelio
male
19.0
0
0
349212
7.8958
NaN
S
878
879
0
3
Laleff, Mr. Kristo
male
NaN
0
0
349217
7.8958
NaN
S
879
880
1
1
Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
female
56.0
0
1
11767
83.1583
C50
C
880
881
1
2
Shelley, Mrs. William (Imanita Parrish Hall)
female
25.0
0
1
230433
26.0000
NaN
S
881
882
0
3
Markun, Mr. Johann
male
33.0
0
0
349257
7.8958
NaN
S
882
883
0
3
Dahlberg, Miss. Gerda Ulrika
female
22.0
0
0
7552
10.5167
NaN
S
883
884
0
2
Banfield, Mr. Frederick James
male
28.0
0
0
C.A./SOTON 34068
10.5000
NaN
S
884
885
0
3
Sutehall, Mr. Henry Jr
male
25.0
0
0
SOTON/OQ 392076
7.0500
NaN
S
885
886
0
3
Rice, Mrs. William (Margaret Norton)
female
39.0
0
5
382652
29.1250
NaN
Q
886
887
0
2
Montvila, Rev. Juozas
male
27.0
0
0
211536
13.0000
NaN
S
887
888
1
1
Graham, Miss. Margaret Edith
female
19.0
0
0
112053
30.0000
B42
S
888
889
0
3
Johnston, Miss. Catherine Helen "Carrie"
female
NaN
1
2
W./C. 6607
23.4500
NaN
S
889
890
1
1
Behr, Mr. Karl Howell
male
26.0
0
0
111369
30.0000
C148
C
890
891
0
3
Dooley, Mr. Patrick
male
32.0
0
0
370376
7.7500
NaN
Q
891 rows × 12 columns
In [57]:
from sklearn.ensemble import RandomForestRegressor
def set_missing_ages(df):
age_df = df[['Age','Fare','Parch','SibSp','Pclass']]
known_age = age_df[age_df.Age.notnull()].as_matrix()
unknown_age = age_df[age_df.Age.isnull()].as_matrix()
y = known_age[:,0]
X = known_age[:,1:]
rfr = RandomForestRegressor(random_state=0,n_estimators=2000,n_jobs=-1)
rfr.fit(X,y)
predictedAges = rfr.predict(unknown_age[:,1::])
df.loc[(df.Age.isnull()),'Age'] = predictedAges
return df,rfr
def set_Cabin_type(df):
df.loc[(df.Cabin.notnull()),'Cabin'] = 'Yes'
df.loc[(df.Cabin.isnull()),'Cabin'] = 'No'
return df
In [58]:
data_train, rfr = set_missing_ages(data_train)
data_train = set_Cabin_type(data_train)
In [59]:
data_train
Out[59]:
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
0
1
0
3
Braund, Mr. Owen Harris
male
22.000000
1
0
A/5 21171
7.2500
No
S
1
2
1
1
Cumings, Mrs. John Bradley (Florence Briggs Th...
female
38.000000
1
0
PC 17599
71.2833
Yes
C
2
3
1
3
Heikkinen, Miss. Laina
female
26.000000
0
0
STON/O2. 3101282
7.9250
No
S
3
4
1
1
Futrelle, Mrs. Jacques Heath (Lily May Peel)
female
35.000000
1
0
113803
53.1000
Yes
S
4
5
0
3
Allen, Mr. William Henry
male
35.000000
0
0
373450
8.0500
No
S
5
6
0
3
Moran, Mr. James
male
23.828953
0
0
330877
8.4583
No
Q
6
7
0
1
McCarthy, Mr. Timothy J
male
54.000000
0
0
17463
51.8625
Yes
S
7
8
0
3
Palsson, Master. Gosta Leonard
male
2.000000
3
1
349909
21.0750
No
S
8
9
1
3
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
female
27.000000
0
2
347742
11.1333
No
S
9
10
1
2
Nasser, Mrs. Nicholas (Adele Achem)
female
14.000000
1
0
237736
30.0708
No
C
10
11
1
3
Sandstrom, Miss. Marguerite Rut
female
4.000000
1
1
PP 9549
16.7000
Yes
S
11
12
1
1
Bonnell, Miss. Elizabeth
female
58.000000
0
0
113783
26.5500
Yes
S
12
13
0
3
Saundercock, Mr. William Henry
male
20.000000
0
0
A/5. 2151
8.0500
No
S
13
14
0
3
Andersson, Mr. Anders Johan
male
39.000000
1
5
347082
31.2750
No
S
14
15
0
3
Vestrom, Miss. Hulda Amanda Adolfina
female
14.000000
0
0
350406
7.8542
No
S
15
16
1
2
Hewlett, Mrs. (Mary D Kingcome)
female
55.000000
0
0
248706
16.0000
No
S
16
17
0
3
Rice, Master. Eugene
male
2.000000
4
1
382652
29.1250
No
Q
17
18
1
2
Williams, Mr. Charles Eugene
male
32.066493
0
0
244373
13.0000
No
S
18
19
0
3
Vander Planke, Mrs. Julius (Emelia Maria Vande...
female
31.000000
1
0
345763
18.0000
No
S
19
20
1
3
Masselmani, Mrs. Fatima
female
29.518205
0
0
2649
7.2250
No
C
20
21
0
2
Fynney, Mr. Joseph J
male
35.000000
0
0
239865
26.0000
No
S
21
22
1
2
Beesley, Mr. Lawrence
male
34.000000
0
0
248698
13.0000
Yes
S
22
23
1
3
McGowan, Miss. Anna "Annie"
female
15.000000
0
0
330923
8.0292
No
Q
23
24
1
1
Sloper, Mr. William Thompson
male
28.000000
0
0
113788
35.5000
Yes
S
24
25
0
3
Palsson, Miss. Torborg Danira
female
8.000000
3
1
349909
21.0750
No
S
25
26
1
3
Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
female
38.000000
1
5
347077
31.3875
No
S
26
27
0
3
Emir, Mr. Farred Chehab
male
29.518205
0
0
2631
7.2250
No
C
27
28
0
1
Fortune, Mr. Charles Alexander
male
19.000000
3
2
19950
263.0000
Yes
S
28
29
1
3
O'Dwyer, Miss. Ellen "Nellie"
female
22.380113
0
0
330959
7.8792
No
Q
29
30
0
3
Todoroff, Mr. Lalio
male
27.947206
0
0
349216
7.8958
No
S
...
...
...
...
...
...
...
...
...
...
...
...
...
861
862
0
2
Giles, Mr. Frederick Edward
male
21.000000
1
0
28134
11.5000
No
S
862
863
1
1
Swift, Mrs. Frederick Joel (Margaret Welles Ba...
female
48.000000
0
0
17466
25.9292
Yes
S
863
864
0
3
Sage, Miss. Dorothy Edith "Dolly"
female
10.869867
8
2
CA. 2343
69.5500
No
S
864
865
0
2
Gill, Mr. John William
male
24.000000
0
0
233866
13.0000
No
S
865
866
1
2
Bystrom, Mrs. (Karolina)
female
42.000000
0
0
236852
13.0000
No
S
866
867
1
2
Duran y More, Miss. Asuncion
female
27.000000
1
0
SC/PARIS 2149
13.8583
No
C
867
868
0
1
Roebling, Mr. Washington Augustus II
male
31.000000
0
0
PC 17590
50.4958
Yes
S
868
869
0
3
van Melkebeke, Mr. Philemon
male
25.977889
0
0
345777
9.5000
No
S
869
870
1
3
Johnson, Master. Harold Theodor
male
4.000000
1
1
347742
11.1333
No
S
870
871
0
3
Balkic, Mr. Cerin
male
26.000000
0
0
349248
7.8958
No
S
871
872
1
1
Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
female
47.000000
1
1
11751
52.5542
Yes
S
872
873
0
1
Carlsson, Mr. Frans Olof
male
33.000000
0
0
695
5.0000
Yes
S
873
874
0
3
Vander Cruyssen, Mr. Victor
male
47.000000
0
0
345765
9.0000
No
S
874
875
1
2
Abelson, Mrs. Samuel (Hannah Wizosky)
female
28.000000
1
0
P/PP 3381
24.0000
No
C
875
876
1
3
Najib, Miss. Adele Kiamie "Jane"
female
15.000000
0
0
2667
7.2250
No
C
876
877
0
3
Gustafsson, Mr. Alfred Ossian
male
20.000000
0
0
7534
9.8458
No
S
877
878
0
3
Petroff, Mr. Nedelio
male
19.000000
0
0
349212
7.8958
No
S
878
879
0
3
Laleff, Mr. Kristo
male
27.947206
0
0
349217
7.8958
No
S
879
880
1
1
Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
female
56.000000
0
1
11767
83.1583
Yes
C
880
881
1
2
Shelley, Mrs. William (Imanita Parrish Hall)
female
25.000000
0
1
230433
26.0000
No
S
881
882
0
3
Markun, Mr. Johann
male
33.000000
0
0
349257
7.8958
No
S
882
883
0
3
Dahlberg, Miss. Gerda Ulrika
female
22.000000
0
0
7552
10.5167
No
S
883
884
0
2
Banfield, Mr. Frederick James
male
28.000000
0
0
C.A./SOTON 34068
10.5000
No
S
884
885
0
3
Sutehall, Mr. Henry Jr
male
25.000000
0
0
SOTON/OQ 392076
7.0500
No
S
885
886
0
3
Rice, Mrs. William (Margaret Norton)
female
39.000000
0
5
382652
29.1250
No
Q
886
887
0
2
Montvila, Rev. Juozas
male
27.000000
0
0
211536
13.0000
No
S
887
888
1
1
Graham, Miss. Margaret Edith
female
19.000000
0
0
112053
30.0000
Yes
S
888
889
0
3
Johnston, Miss. Catherine Helen "Carrie"
female
16.127950
1
2
W./C. 6607
23.4500
No
S
889
890
1
1
Behr, Mr. Karl Howell
male
26.000000
0
0
111369
30.0000
Yes
C
890
891
0
3
Dooley, Mr. Patrick
male
32.000000
0
0
370376
7.7500
No
Q
891 rows × 12 columns
In [60]:
dummies_Cabin = pd.get_dummies(data_train["Cabin"],prefix='Cabin')
dummies_Embarked = pd.get_dummies(data_train['Embarked'],prefix='Embarked')
dummies_Sex = pd.get_dummies(data_train['Sex'],prefix='Sex')
dummies_Pclass = pd.get_dummies(data_train['Pclass'],prefix='Pclass')
df = pd.concat([data_train,dummies_Cabin,dummies_Embarked,dummies_Pclass,dummies_Sex],axis = 1)
df.drop(['Cabin','Pclass','Sex','Name','Ticket','Embarked'],axis = 1,inplace=True)
In [61]:
df
Out[61]:
PassengerId
Survived
Age
SibSp
Parch
Fare
Cabin_No
Cabin_Yes
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
Sex_female
Sex_male
0
1
0
22.000000
1
0
7.2500
1
0
0
0
1
0
0
1
0
1
1
2
1
38.000000
1
0
71.2833
0
1
1
0
0
1
0
0
1
0
2
3
1
26.000000
0
0
7.9250
1
0
0
0
1
0
0
1
1
0
3
4
1
35.000000
1
0
53.1000
0
1
0
0
1
1
0
0
1
0
4
5
0
35.000000
0
0
8.0500
1
0
0
0
1
0
0
1
0
1
5
6
0
23.828953
0
0
8.4583
1
0
0
1
0
0
0
1
0
1
6
7
0
54.000000
0
0
51.8625
0
1
0
0
1
1
0
0
0
1
7
8
0
2.000000
3
1
21.0750
1
0
0
0
1
0
0
1
0
1
8
9
1
27.000000
0
2
11.1333
1
0
0
0
1
0
0
1
1
0
9
10
1
14.000000
1
0
30.0708
1
0
1
0
0
0
1
0
1
0
10
11
1
4.000000
1
1
16.7000
0
1
0
0
1
0
0
1
1
0
11
12
1
58.000000
0
0
26.5500
0
1
0
0
1
1
0
0
1
0
12
13
0
20.000000
0
0
8.0500
1
0
0
0
1
0
0
1
0
1
13
14
0
39.000000
1
5
31.2750
1
0
0
0
1
0
0
1
0
1
14
15
0
14.000000
0
0
7.8542
1
0
0
0
1
0
0
1
1
0
15
16
1
55.000000
0
0
16.0000
1
0
0
0
1
0
1
0
1
0
16
17
0
2.000000
4
1
29.1250
1
0
0
1
0
0
0
1
0
1
17
18
1
32.066493
0
0
13.0000
1
0
0
0
1
0
1
0
0
1
18
19
0
31.000000
1
0
18.0000
1
0
0
0
1
0
0
1
1
0
19
20
1
29.518205
0
0
7.2250
1
0
1
0
0
0
0
1
1
0
20
21
0
35.000000
0
0
26.0000
1
0
0
0
1
0
1
0
0
1
21
22
1
34.000000
0
0
13.0000
0
1
0
0
1
0
1
0
0
1
22
23
1
15.000000
0
0
8.0292
1
0
0
1
0
0
0
1
1
0
23
24
1
28.000000
0
0
35.5000
0
1
0
0
1
1
0
0
0
1
24
25
0
8.000000
3
1
21.0750
1
0
0
0
1
0
0
1
1
0
25
26
1
38.000000
1
5
31.3875
1
0
0
0
1
0
0
1
1
0
26
27
0
29.518205
0
0
7.2250
1
0
1
0
0
0
0
1
0
1
27
28
0
19.000000
3
2
263.0000
0
1
0
0
1
1
0
0
0
1
28
29
1
22.380113
0
0
7.8792
1
0
0
1
0
0
0
1
1
0
29
30
0
27.947206
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
861
862
0
21.000000
1
0
11.5000
1
0
0
0
1
0
1
0
0
1
862
863
1
48.000000
0
0
25.9292
0
1
0
0
1
1
0
0
1
0
863
864
0
10.869867
8
2
69.5500
1
0
0
0
1
0
0
1
1
0
864
865
0
24.000000
0
0
13.0000
1
0
0
0
1
0
1
0
0
1
865
866
1
42.000000
0
0
13.0000
1
0
0
0
1
0
1
0
1
0
866
867
1
27.000000
1
0
13.8583
1
0
1
0
0
0
1
0
1
0
867
868
0
31.000000
0
0
50.4958
0
1
0
0
1
1
0
0
0
1
868
869
0
25.977889
0
0
9.5000
1
0
0
0
1
0
0
1
0
1
869
870
1
4.000000
1
1
11.1333
1
0
0
0
1
0
0
1
0
1
870
871
0
26.000000
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
871
872
1
47.000000
1
1
52.5542
0
1
0
0
1
1
0
0
1
0
872
873
0
33.000000
0
0
5.0000
0
1
0
0
1
1
0
0
0
1
873
874
0
47.000000
0
0
9.0000
1
0
0
0
1
0
0
1
0
1
874
875
1
28.000000
1
0
24.0000
1
0
1
0
0
0
1
0
1
0
875
876
1
15.000000
0
0
7.2250
1
0
1
0
0
0
0
1
1
0
876
877
0
20.000000
0
0
9.8458
1
0
0
0
1
0
0
1
0
1
877
878
0
19.000000
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
878
879
0
27.947206
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
879
880
1
56.000000
0
1
83.1583
0
1
1
0
0
1
0
0
1
0
880
881
1
25.000000
0
1
26.0000
1
0
0
0
1
0
1
0
1
0
881
882
0
33.000000
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
882
883
0
22.000000
0
0
10.5167
1
0
0
0
1
0
0
1
1
0
883
884
0
28.000000
0
0
10.5000
1
0
0
0
1
0
1
0
0
1
884
885
0
25.000000
0
0
7.0500
1
0
0
0
1
0
0
1
0
1
885
886
0
39.000000
0
5
29.1250
1
0
0
1
0
0
0
1
1
0
886
887
0
27.000000
0
0
13.0000
1
0
0
0
1
0
1
0
0
1
887
888
1
19.000000
0
0
30.0000
0
1
0
0
1
1
0
0
1
0
888
889
0
16.127950
1
2
23.4500
1
0
0
0
1
0
0
1
1
0
889
890
1
26.000000
0
0
30.0000
0
1
1
0
0
1
0
0
0
1
890
891
0
32.000000
0
0
7.7500
1
0
0
1
0
0
0
1
0
1
891 rows × 16 columns
In [62]:
import sklearn.preprocessing as preprocessing
scaler = preprocessing.StandardScaler()
age_scale_param = scaler.fit(df['Age'])
df['Age_scaled'] = scaler.fit_transform(df['Age'],age_scale_param)
fare_scale_param = scaler.fit(df['Fare'])
df['Fare_scaled']= scaler.fit_transform(df['Fare'],fare_scale_param)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
In [63]:
df
Out[63]:
PassengerId
Survived
Age
SibSp
Parch
Fare
Cabin_No
Cabin_Yes
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
Sex_female
Sex_male
Age_scaled
Fare_scaled
0
1
0
22.000000
1
0
7.2500
1
0
0
0
1
0
0
1
0
1
-0.561363
-0.502445
1
2
1
38.000000
1
0
71.2833
0
1
1
0
0
1
0
0
1
0
0.613182
0.786845
2
3
1
26.000000
0
0
7.9250
1
0
0
0
1
0
0
1
1
0
-0.267727
-0.488854
3
4
1
35.000000
1
0
53.1000
0
1
0
0
1
1
0
0
1
0
0.392955
0.420730
4
5
0
35.000000
0
0
8.0500
1
0
0
0
1
0
0
1
0
1
0.392955
-0.486337
5
6
0
23.828953
0
0
8.4583
1
0
0
1
0
0
0
1
0
1
-0.427102
-0.478116
6
7
0
54.000000
0
0
51.8625
0
1
0
0
1
1
0
0
0
1
1.787727
0.395814
7
8
0
2.000000
3
1
21.0750
1
0
0
0
1
0
0
1
0
1
-2.029545
-0.224083
8
9
1
27.000000
0
2
11.1333
1
0
0
0
1
0
0
1
1
0
-0.194318
-0.424256
9
10
1
14.000000
1
0
30.0708
1
0
1
0
0
0
1
0
1
0
-1.148636
-0.042956
10
11
1
4.000000
1
1
16.7000
0
1
0
0
1
0
0
1
1
0
-1.882726
-0.312172
11
12
1
58.000000
0
0
26.5500
0
1
0
0
1
1
0
0
1
0
2.081363
-0.113846
12
13
0
20.000000
0
0
8.0500
1
0
0
0
1
0
0
1
0
1
-0.708181
-0.486337
13
14
0
39.000000
1
5
31.2750
1
0
0
0
1
0
0
1
0
1
0.686591
-0.018709
14
15
0
14.000000
0
0
7.8542
1
0
0
0
1
0
0
1
1
0
-1.148636
-0.490280
15
16
1
55.000000
0
0
16.0000
1
0
0
0
1
0
1
0
1
0
1.861136
-0.326267
16
17
0
2.000000
4
1
29.1250
1
0
0
1
0
0
0
1
0
1
-2.029545
-0.061999
17
18
1
32.066493
0
0
13.0000
1
0
0
0
1
0
1
0
0
1
0.177609
-0.386671
18
19
0
31.000000
1
0
18.0000
1
0
0
0
1
0
0
1
1
0
0.099318
-0.285997
19
20
1
29.518205
0
0
7.2250
1
0
1
0
0
0
0
1
1
0
-0.009459
-0.502949
20
21
0
35.000000
0
0
26.0000
1
0
0
0
1
0
1
0
0
1
0.392955
-0.124920
21
22
1
34.000000
0
0
13.0000
0
1
0
0
1
0
1
0
0
1
0.319546
-0.386671
22
23
1
15.000000
0
0
8.0292
1
0
0
1
0
0
0
1
1
0
-1.075227
-0.486756
23
24
1
28.000000
0
0
35.5000
0
1
0
0
1
1
0
0
0
1
-0.120909
0.066360
24
25
0
8.000000
3
1
21.0750
1
0
0
0
1
0
0
1
1
0
-1.589090
-0.224083
25
26
1
38.000000
1
5
31.3875
1
0
0
0
1
0
0
1
1
0
0.613182
-0.016444
26
27
0
29.518205
0
0
7.2250
1
0
1
0
0
0
0
1
0
1
-0.009459
-0.502949
27
28
0
19.000000
3
2
263.0000
0
1
0
0
1
1
0
0
0
1
-0.781590
4.647001
28
29
1
22.380113
0
0
7.8792
1
0
0
1
0
0
0
1
1
0
-0.533459
-0.489776
29
30
0
27.947206
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
-0.124784
-0.489442
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
861
862
0
21.000000
1
0
11.5000
1
0
0
0
1
0
1
0
0
1
-0.634772
-0.416873
862
863
1
48.000000
0
0
25.9292
0
1
0
0
1
1
0
0
1
0
1.347272
-0.126345
863
864
0
10.869867
8
2
69.5500
1
0
0
0
1
0
0
1
1
0
-1.378416
0.751946
864
865
0
24.000000
0
0
13.0000
1
0
0
0
1
0
1
0
0
1
-0.414545
-0.386671
865
866
1
42.000000
0
0
13.0000
1
0
0
0
1
0
1
0
1
0
0.906818
-0.386671
866
867
1
27.000000
1
0
13.8583
1
0
1
0
0
0
1
0
1
0
-0.194318
-0.369389
867
868
0
31.000000
0
0
50.4958
0
1
0
0
1
1
0
0
0
1
0.099318
0.368295
868
869
0
25.977889
0
0
9.5000
1
0
0
0
1
0
0
1
0
1
-0.269350
-0.457142
869
870
1
4.000000
1
1
11.1333
1
0
0
0
1
0
0
1
0
1
-1.882726
-0.424256
870
871
0
26.000000
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
-0.267727
-0.489442
871
872
1
47.000000
1
1
52.5542
0
1
0
0
1
1
0
0
1
0
1.273863
0.409741
872
873
0
33.000000
0
0
5.0000
0
1
0
0
1
1
0
0
0
1
0.246136
-0.547748
873
874
0
47.000000
0
0
9.0000
1
0
0
0
1
0
0
1
0
1
1.273863
-0.467209
874
875
1
28.000000
1
0
24.0000
1
0
1
0
0
0
1
0
1
0
-0.120909
-0.165189
875
876
1
15.000000
0
0
7.2250
1
0
1
0
0
0
0
1
1
0
-1.075227
-0.502949
876
877
0
20.000000
0
0
9.8458
1
0
0
0
1
0
0
1
0
1
-0.708181
-0.450180
877
878
0
19.000000
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
-0.781590
-0.489442
878
879
0
27.947206
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
-0.124784
-0.489442
879
880
1
56.000000
0
1
83.1583
0
1
1
0
0
1
0
0
1
0
1.934545
1.025945
880
881
1
25.000000
0
1
26.0000
1
0
0
0
1
0
1
0
1
0
-0.341136
-0.124920
881
882
0
33.000000
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
0.246136
-0.489442
882
883
0
22.000000
0
0
10.5167
1
0
0
0
1
0
0
1
1
0
-0.561363
-0.436671
883
884
0
28.000000
0
0
10.5000
1
0
0
0
1
0
1
0
0
1
-0.120909
-0.437007
884
885
0
25.000000
0
0
7.0500
1
0
0
0
1
0
0
1
0
1
-0.341136
-0.506472
885
886
0
39.000000
0
5
29.1250
1
0
0
1
0
0
0
1
1
0
0.686591
-0.061999
886
887
0
27.000000
0
0
13.0000
1
0
0
0
1
0
1
0
0
1
-0.194318
-0.386671
887
888
1
19.000000
0
0
30.0000
0
1
0
0
1
1
0
0
1
0
-0.781590
-0.044381
888
889
0
16.127950
1
2
23.4500
1
0
0
0
1
0
0
1
1
0
-0.992425
-0.176263
889
890
1
26.000000
0
0
30.0000
0
1
1
0
0
1
0
0
0
1
-0.267727
-0.044381
890
891
0
32.000000
0
0
7.7500
1
0
0
1
0
0
0
1
0
1
0.172727
-0.492378
891 rows × 18 columns
In [85]:
from sklearn import linear_model
train_df = df.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
train_np = train_df.as_matrix()
y = train_np[:,0]
X = train_np[:,1:]
clf = linear_model.LogisticRegression(C=1.0,penalty='l1',tol=1e-6)
clf.fit(X,y)
Out[85]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
penalty='l1', random_state=None, solver='liblinear', tol=1e-06,
verbose=0, warm_start=False)
In [87]:
train_df
Out[87]:
Survived
SibSp
Parch
Cabin_No
Cabin_Yes
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
Sex_female
Sex_male
Age_scaled
Fare_scaled
0
0
1
0
1
0
0
0
1
0
0
1
0
1
-0.561363
-0.502445
1
1
1
0
0
1
1
0
0
1
0
0
1
0
0.613182
0.786845
2
1
0
0
1
0
0
0
1
0
0
1
1
0
-0.267727
-0.488854
3
1
1
0
0
1
0
0
1
1
0
0
1
0
0.392955
0.420730
4
0
0
0
1
0
0
0
1
0
0
1
0
1
0.392955
-0.486337
5
0
0
0
1
0
0
1
0
0
0
1
0
1
-0.427102
-0.478116
6
0
0
0
0
1
0
0
1
1
0
0
0
1
1.787727
0.395814
7
0
3
1
1
0
0
0
1
0
0
1
0
1
-2.029545
-0.224083
8
1
0
2
1
0
0
0
1
0
0
1
1
0
-0.194318
-0.424256
9
1
1
0
1
0
1
0
0
0
1
0
1
0
-1.148636
-0.042956
10
1
1
1
0
1
0
0
1
0
0
1
1
0
-1.882726
-0.312172
11
1
0
0
0
1
0
0
1
1
0
0
1
0
2.081363
-0.113846
12
0
0
0
1
0
0
0
1
0
0
1
0
1
-0.708181
-0.486337
13
0
1
5
1
0
0
0
1
0
0
1
0
1
0.686591
-0.018709
14
0
0
0
1
0
0
0
1
0
0
1
1
0
-1.148636
-0.490280
15
1
0
0
1
0
0
0
1
0
1
0
1
0
1.861136
-0.326267
16
0
4
1
1
0
0
1
0
0
0
1
0
1
-2.029545
-0.061999
17
1
0
0
1
0
0
0
1
0
1
0
0
1
0.177609
-0.386671
18
0
1
0
1
0
0
0
1
0
0
1
1
0
0.099318
-0.285997
19
1
0
0
1
0
1
0
0
0
0
1
1
0
-0.009459
-0.502949
20
0
0
0
1
0
0
0
1
0
1
0
0
1
0.392955
-0.124920
21
1
0
0
0
1
0
0
1
0
1
0
0
1
0.319546
-0.386671
22
1
0
0
1
0
0
1
0
0
0
1
1
0
-1.075227
-0.486756
23
1
0
0
0
1
0
0
1
1
0
0
0
1
-0.120909
0.066360
24
0
3
1
1
0
0
0
1
0
0
1
1
0
-1.589090
-0.224083
25
1
1
5
1
0
0
0
1
0
0
1
1
0
0.613182
-0.016444
26
0
0
0
1
0
1
0
0
0
0
1
0
1
-0.009459
-0.502949
27
0
3
2
0
1
0
0
1
1
0
0
0
1
-0.781590
4.647001
28
1
0
0
1
0
0
1
0
0
0
1
1
0
-0.533459
-0.489776
29
0
0
0
1
0
0
0
1
0
0
1
0
1
-0.124784
-0.489442
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
861
0
1
0
1
0
0
0
1
0
1
0
0
1
-0.634772
-0.416873
862
1
0
0
0
1
0
0
1
1
0
0
1
0
1.347272
-0.126345
863
0
8
2
1
0
0
0
1
0
0
1
1
0
-1.378416
0.751946
864
0
0
0
1
0
0
0
1
0
1
0
0
1
-0.414545
-0.386671
865
1
0
0
1
0
0
0
1
0
1
0
1
0
0.906818
-0.386671
866
1
1
0
1
0
1
0
0
0
1
0
1
0
-0.194318
-0.369389
867
0
0
0
0
1
0
0
1
1
0
0
0
1
0.099318
0.368295
868
0
0
0
1
0
0
0
1
0
0
1
0
1
-0.269350
-0.457142
869
1
1
1
1
0
0
0
1
0
0
1
0
1
-1.882726
-0.424256
870
0
0
0
1
0
0
0
1
0
0
1
0
1
-0.267727
-0.489442
871
1
1
1
0
1
0
0
1
1
0
0
1
0
1.273863
0.409741
872
0
0
0
0
1
0
0
1
1
0
0
0
1
0.246136
-0.547748
873
0
0
0
1
0
0
0
1
0
0
1
0
1
1.273863
-0.467209
874
1
1
0
1
0
1
0
0
0
1
0
1
0
-0.120909
-0.165189
875
1
0
0
1
0
1
0
0
0
0
1
1
0
-1.075227
-0.502949
876
0
0
0
1
0
0
0
1
0
0
1
0
1
-0.708181
-0.450180
877
0
0
0
1
0
0
0
1
0
0
1
0
1
-0.781590
-0.489442
878
0
0
0
1
0
0
0
1
0
0
1
0
1
-0.124784
-0.489442
879
1
0
1
0
1
1
0
0
1
0
0
1
0
1.934545
1.025945
880
1
0
1
1
0
0
0
1
0
1
0
1
0
-0.341136
-0.124920
881
0
0
0
1
0
0
0
1
0
0
1
0
1
0.246136
-0.489442
882
0
0
0
1
0
0
0
1
0
0
1
1
0
-0.561363
-0.436671
883
0
0
0
1
0
0
0
1
0
1
0
0
1
-0.120909
-0.437007
884
0
0
0
1
0
0
0
1
0
0
1
0
1
-0.341136
-0.506472
885
0
0
5
1
0
0
1
0
0
0
1
1
0
0.686591
-0.061999
886
0
0
0
1
0
0
0
1
0
1
0
0
1
-0.194318
-0.386671
887
1
0
0
0
1
0
0
1
1
0
0
1
0
-0.781590
-0.044381
888
0
1
2
1
0
0
0
1
0
0
1
1
0
-0.992425
-0.176263
889
1
0
0
0
1
1
0
0
1
0
0
0
1
-0.267727
-0.044381
890
0
0
0
1
0
0
1
0
0
0
1
0
1
0.172727
-0.492378
891 rows × 15 columns
In [88]:
data_test= pd.read_csv("test.csv")
data_test.loc[(data_test.Fare.isnull()),'Fare']= 0
tmp_df = data_test[['Age','Fare','Parch','SibSp','Pclass']]
null_age = tmp_df[data_test.Age.isnull()].as_matrix()
X = null_age[:,1:]
predictedAges = rfr.predict(X)
data_test.loc[(data_test.Age.isnull()),'Age'] = predictedAges
data_test = set_Cabin_type(data_test)
dummies_Cabin = pd.get_dummies(data_test["Cabin"],prefix='Cabin')
dummies_Embarked = pd.get_dummies(data_test['Embarked'],prefix='Embarked')
dummies_Sex = pd.get_dummies(data_test['Sex'],prefix='Sex')
dummies_Pclass = pd.get_dummies(data_test['Pclass'],prefix='Pclass')
df_test = pd.concat([data_test,dummies_Cabin,dummies_Embarked,dummies_Pclass,dummies_Sex],axis = 1)
df_test.drop(['Cabin','Pclass','Sex','Name','Ticket','Embarked'],axis = 1,inplace=True)
In [89]:
df_test
Out[89]:
PassengerId
Age
SibSp
Parch
Fare
Cabin_No
Cabin_Yes
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
Sex_female
Sex_male
0
892
34.500000
0
0
7.8292
1
0
0
1
0
0
0
1
0
1
1
893
47.000000
1
0
7.0000
1
0
0
0
1
0
0
1
1
0
2
894
62.000000
0
0
9.6875
1
0
0
1
0
0
1
0
0
1
3
895
27.000000
0
0
8.6625
1
0
0
0
1
0
0
1
0
1
4
896
22.000000
1
1
12.2875
1
0
0
0
1
0
0
1
1
0
5
897
14.000000
0
0
9.2250
1
0
0
0
1
0
0
1
0
1
6
898
30.000000
0
0
7.6292
1
0
0
1
0
0
0
1
1
0
7
899
26.000000
1
1
29.0000
1
0
0
0
1
0
1
0
0
1
8
900
18.000000
0
0
7.2292
1
0
1
0
0
0
0
1
1
0
9
901
21.000000
2
0
24.1500
1
0
0
0
1
0
0
1
0
1
10
902
27.947206
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
11
903
46.000000
0
0
26.0000
1
0
0
0
1
1
0
0
0
1
12
904
23.000000
1
0
82.2667
0
1
0
0
1
1
0
0
1
0
13
905
63.000000
1
0
26.0000
1
0
0
0
1
0
1
0
0
1
14
906
47.000000
1
0
61.1750
0
1
0
0
1
1
0
0
1
0
15
907
24.000000
1
0
27.7208
1
0
1
0
0
0
1
0
1
0
16
908
35.000000
0
0
12.3500
1
0
0
1
0
0
1
0
0
1
17
909
21.000000
0
0
7.2250
1
0
1
0
0
0
0
1
0
1
18
910
27.000000
1
0
7.9250
1
0
0
0
1
0
0
1
1
0
19
911
45.000000
0
0
7.2250
1
0
1
0
0
0
0
1
1
0
20
912
55.000000
1
0
59.4000
1
0
1
0
0
1
0
0
0
1
21
913
9.000000
0
1
3.1708
1
0
0
0
1
0
0
1
0
1
22
914
52.314311
0
0
31.6833
1
0
0
0
1
1
0
0
1
0
23
915
21.000000
0
1
61.3792
1
0
1
0
0
1
0
0
0
1
24
916
48.000000
1
3
262.3750
0
1
1
0
0
1
0
0
1
0
25
917
50.000000
1
0
14.5000
1
0
0
0
1
0
0
1
0
1
26
918
22.000000
0
1
61.9792
0
1
1
0
0
1
0
0
1
0
27
919
22.500000
0
0
7.2250
1
0
1
0
0
0
0
1
0
1
28
920
41.000000
0
0
30.5000
0
1
0
0
1
1
0
0
0
1
29
921
23.458621
2
0
21.6792
1
0
1
0
0
0
0
1
0
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
388
1280
21.000000
0
0
7.7500
1
0
0
1
0
0
0
1
0
1
389
1281
6.000000
3
1
21.0750
1
0
0
0
1
0
0
1
0
1
390
1282
23.000000
0
0
93.5000
0
1
0
0
1
1
0
0
0
1
391
1283
51.000000
0
1
39.4000
0
1
0
0
1
1
0
0
1
0
392
1284
13.000000
0
2
20.2500
1
0
0
0
1
0
0
1
0
1
393
1285
47.000000
0
0
10.5000
1
0
0
0
1
0
1
0
0
1
394
1286
29.000000
3
1
22.0250
1
0
0
0
1
0
0
1
0
1
395
1287
18.000000
1
0
60.0000
0
1
0
0
1
1
0
0
1
0
396
1288
24.000000
0
0
7.2500
1
0
0
1
0
0
0
1
0
1
397
1289
48.000000
1
1
79.2000
0
1
1
0
0
1
0
0
1
0
398
1290
22.000000
0
0
7.7750
1
0
0
0
1
0
0
1
0
1
399
1291
31.000000
0
0
7.7333
1
0
0
1
0
0
0
1
0
1
400
1292
30.000000
0
0
164.8667
0
1
0
0
1
1
0
0
1
0
401
1293
38.000000
1
0
21.0000
1
0
0
0
1
0
1
0
0
1
402
1294
22.000000
0
1
59.4000
1
0
1
0
0
1
0
0
1
0
403
1295
17.000000
0
0
47.1000
1
0
0
0
1
1
0
0
0
1
404
1296
43.000000
1
0
27.7208
0
1
1
0
0
1
0
0
0
1
405
1297
20.000000
0
0
13.8625
0
1
1
0
0
0
1
0
0
1
406
1298
23.000000
1
0
10.5000
1
0
0
0
1
0
1
0
0
1
407
1299
50.000000
1
1
211.5000
0
1
1
0
0
1
0
0
0
1
408
1300
19.895581
0
0
7.7208
1
0
0
1
0
0
0
1
1
0
409
1301
3.000000
1
1
13.7750
1
0
0
0
1
0
0
1
1
0
410
1302
35.295824
0
0
7.7500
1
0
0
1
0
0
0
1
1
0
411
1303
37.000000
1
0
90.0000
0
1
0
1
0
1
0
0
1
0
412
1304
28.000000
0
0
7.7750
1
0
0
0
1
0
0
1
1
0
413
1305
30.705727
0
0
8.0500
1
0
0
0
1
0
0
1
0
1
414
1306
39.000000
0
0
108.9000
0
1
1
0
0
1
0
0
1
0
415
1307
38.500000
0
0
7.2500
1
0
0
0
1
0
0
1
0
1
416
1308
30.705727
0
0
8.0500
1
0
0
0
1
0
0
1
0
1
417
1309
25.793502
1
1
22.3583
1
0
1
0
0
0
0
1
0
1
418 rows × 15 columns
In [90]:
df_test['Age_scaled'] = scaler.fit_transform(df_test['Age'],age_scale_param)
df_test['Fare_scaled']= scaler.fit_transform(df_test['Fare'],fare_scale_param)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
In [91]:
df_test
Out[91]:
PassengerId
Age
SibSp
Parch
Fare
Cabin_No
Cabin_Yes
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
Sex_female
Sex_male
Age_scaled
Fare_scaled
0
892
34.500000
0
0
7.8292
1
0
0
1
0
0
0
1
0
1
0.307535
-0.496637
1
893
47.000000
1
0
7.0000
1
0
0
0
1
0
0
1
1
0
1.256230
-0.511497
2
894
62.000000
0
0
9.6875
1
0
0
1
0
0
1
0
0
1
2.394665
-0.463335
3
895
27.000000
0
0
8.6625
1
0
0
0
1
0
0
1
0
1
-0.261683
-0.481704
4
896
22.000000
1
1
12.2875
1
0
0
0
1
0
0
1
1
0
-0.641161
-0.416740
5
897
14.000000
0
0
9.2250
1
0
0
0
1
0
0
1
0
1
-1.248326
-0.471623
6
898
30.000000
0
0
7.6292
1
0
0
1
0
0
0
1
1
0
-0.033996
-0.500221
7
899
26.000000
1
1
29.0000
1
0
0
0
1
0
1
0
0
1
-0.337578
-0.117238
8
900
18.000000
0
0
7.2292
1
0
1
0
0
0
0
1
1
0
-0.944743
-0.507390
9
901
21.000000
2
0
24.1500
1
0
0
0
1
0
0
1
0
1
-0.717056
-0.204154
10
902
27.947206
0
0
7.8958
1
0
0
0
1
0
0
1
0
1
-0.189794
-0.495444
11
903
46.000000
0
0
26.0000
1
0
0
0
1
1
0
0
0
1
1.180334
-0.171000
12
904
23.000000
1
0
82.2667
0
1
0
0
1
1
0
0
1
0
-0.565265
0.837349
13
905
63.000000
1
0
26.0000
1
0
0
0
1
0
1
0
0
1
2.470560
-0.171000
14
906
47.000000
1
0
61.1750
0
1
0
0
1
1
0
0
1
0
1.256230
0.459367
15
907
24.000000
1
0
27.7208
1
0
1
0
0
0
1
0
1
0
-0.489370
-0.140162
16
908
35.000000
0
0
12.3500
1
0
0
1
0
0
1
0
0
1
0.345482
-0.415620
17
909
21.000000
0
0
7.2250
1
0
1
0
0
0
0
1
0
1
-0.717056
-0.507465
18
910
27.000000
1
0
7.9250
1
0
0
0
1
0
0
1
1
0
-0.261683
-0.494920
19
911
45.000000
0
0
7.2250
1
0
1
0
0
0
0
1
1
0
1.104439
-0.507465
20
912
55.000000
1
0
59.4000
1
0
1
0
0
1
0
0
0
1
1.863395
0.427557
21
913
9.000000
0
1
3.1708
1
0
0
0
1
0
0
1
0
1
-1.627804
-0.580120
22
914
52.314311
0
0
31.6833
1
0
0
0
1
1
0
0
1
0
1.659563
-0.069151
23
915
21.000000
0
1
61.3792
1
0
1
0
0
1
0
0
0
1
-0.717056
0.463026
24
916
48.000000
1
3
262.3750
0
1
1
0
0
1
0
0
1
0
1.332126
4.065049
25
917
50.000000
1
0
14.5000
1
0
0
0
1
0
0
1
0
1
1.483917
-0.377090
26
918
22.000000
0
1
61.9792
0
1
1
0
0
1
0
0
1
0
-0.641161
0.473779
27
919
22.500000
0
0
7.2250
1
0
1
0
0
0
0
1
0
1
-0.603213
-0.507465
28
920
41.000000
0
0
30.5000
0
1
0
0
1
1
0
0
0
1
0.800856
-0.090356
29
921
23.458621
2
0
21.6792
1
0
1
0
0
0
0
1
0
1
-0.530458
-0.248433
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
388
1280
21.000000
0
0
7.7500
1
0
0
1
0
0
0
1
0
1
-0.717056
-0.498056
389
1281
6.000000
3
1
21.0750
1
0
0
0
1
0
0
1
0
1
-1.855491
-0.259261
390
1282
23.000000
0
0
93.5000
0
1
0
0
1
1
0
0
0
1
-0.565265
1.038659
391
1283
51.000000
0
1
39.4000
0
1
0
0
1
1
0
0
1
0
1.559813
0.069140
392
1284
13.000000
0
2
20.2500
1
0
0
0
1
0
0
1
0
1
-1.324222
-0.274045
393
1285
47.000000
0
0
10.5000
1
0
0
0
1
0
1
0
0
1
1.256230
-0.448774
394
1286
29.000000
3
1
22.0250
1
0
0
0
1
0
0
1
0
1
-0.109891
-0.242236
395
1287
18.000000
1
0
60.0000
0
1
0
0
1
1
0
0
1
0
-0.944743
0.438310
396
1288
24.000000
0
0
7.2500
1
0
0
1
0
0
0
1
0
1
-0.489370
-0.507017
397
1289
48.000000
1
1
79.2000
0
1
1
0
0
1
0
0
1
0
1.332126
0.782391
398
1290
22.000000
0
0
7.7750
1
0
0
0
1
0
0
1
0
1
-0.641161
-0.497608
399
1291
31.000000
0
0
7.7333
1
0
0
1
0
0
0
1
0
1
0.041900
-0.498356
400
1292
30.000000
0
0
164.8667
0
1
0
0
1
1
0
0
1
0
-0.033996
2.317614
401
1293
38.000000
1
0
21.0000
1
0
0
0
1
0
1
0
0
1
0.573169
-0.260605
402
1294
22.000000
0
1
59.4000
1
0
1
0
0
1
0
0
1
0
-0.641161
0.427557
403
1295
17.000000
0
0
47.1000
1
0
0
0
1
1
0
0
0
1
-1.020639
0.207130
404
1296
43.000000
1
0
27.7208
0
1
1
0
0
1
0
0
0
1
0.952648
-0.140162
405
1297
20.000000
0
0
13.8625
0
1
1
0
0
0
1
0
0
1
-0.792952
-0.388515
406
1298
23.000000
1
0
10.5000
1
0
0
0
1
0
1
0
0
1
-0.565265
-0.448774
407
1299
50.000000
1
1
211.5000
0
1
1
0
0
1
0
0
0
1
1.483917
3.153324
408
1300
19.895581
0
0
7.7208
1
0
0
1
0
0
0
1
1
0
-0.800877
-0.498580
409
1301
3.000000
1
1
13.7750
1
0
0
0
1
0
0
1
1
0
-2.083178
-0.390083
410
1302
35.295824
0
0
7.7500
1
0
0
1
0
0
0
1
1
0
0.367934
-0.498056
411
1303
37.000000
1
0
90.0000
0
1
0
1
0
1
0
0
1
0
0.497274
0.975936
412
1304
28.000000
0
0
7.7750
1
0
0
0
1
0
0
1
1
0
-0.185787
-0.497608
413
1305
30.705727
0
0
8.0500
1
0
0
0
1
0
0
1
0
1
0.019566
-0.492680
414
1306
39.000000
0
0
108.9000
0
1
1
0
0
1
0
0
1
0
0.649065
1.314641
415
1307
38.500000
0
0
7.2500
1
0
0
0
1
0
0
1
0
1
0.611117
-0.507017
416
1308
30.705727
0
0
8.0500
1
0
0
0
1
0
0
1
0
1
0.019566
-0.492680
417
1309
25.793502
1
1
22.3583
1
0
1
0
0
0
0
1
0
1
-0.353251
-0.236263
418 rows × 17 columns
In [92]:
test = df_test.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
predictions = clf.predict(test)
In [93]:
test
Out[93]:
SibSp
Parch
Cabin_No
Cabin_Yes
Embarked_C
Embarked_Q
Embarked_S
Pclass_1
Pclass_2
Pclass_3
Sex_female
Sex_male
Age_scaled
Fare_scaled
0
0
0
1
0
0
1
0
0
0
1
0
1
0.307535
-0.496637
1
1
0
1
0
0
0
1
0
0
1
1
0
1.256230
-0.511497
2
0
0
1
0
0
1
0
0
1
0
0
1
2.394665
-0.463335
3
0
0
1
0
0
0
1
0
0
1
0
1
-0.261683
-0.481704
4
1
1
1
0
0
0
1
0
0
1
1
0
-0.641161
-0.416740
5
0
0
1
0
0
0
1
0
0
1
0
1
-1.248326
-0.471623
6
0
0
1
0
0
1
0
0
0
1
1
0
-0.033996
-0.500221
7
1
1
1
0
0
0
1
0
1
0
0
1
-0.337578
-0.117238
8
0
0
1
0
1
0
0
0
0
1
1
0
-0.944743
-0.507390
9
2
0
1
0
0
0
1
0
0
1
0
1
-0.717056
-0.204154
10
0
0
1
0
0
0
1
0
0
1
0
1
-0.189794
-0.495444
11
0
0
1
0
0
0
1
1
0
0
0
1
1.180334
-0.171000
12
1
0
0
1
0
0
1
1
0
0
1
0
-0.565265
0.837349
13
1
0
1
0
0
0
1
0
1
0
0
1
2.470560
-0.171000
14
1
0
0
1
0
0
1
1
0
0
1
0
1.256230
0.459367
15
1
0
1
0
1
0
0
0
1
0
1
0
-0.489370
-0.140162
16
0
0
1
0
0
1
0
0
1
0
0
1
0.345482
-0.415620
17
0
0
1
0
1
0
0
0
0
1
0
1
-0.717056
-0.507465
18
1
0
1
0
0
0
1
0
0
1
1
0
-0.261683
-0.494920
19
0
0
1
0
1
0
0
0
0
1
1
0
1.104439
-0.507465
20
1
0
1
0
1
0
0
1
0
0
0
1
1.863395
0.427557
21
0
1
1
0
0
0
1
0
0
1
0
1
-1.627804
-0.580120
22
0
0
1
0
0
0
1
1
0
0
1
0
1.659563
-0.069151
23
0
1
1
0
1
0
0
1
0
0
0
1
-0.717056
0.463026
24
1
3
0
1
1
0
0
1
0
0
1
0
1.332126
4.065049
25
1
0
1
0
0
0
1
0
0
1
0
1
1.483917
-0.377090
26
0
1
0
1
1
0
0
1
0
0
1
0
-0.641161
0.473779
27
0
0
1
0
1
0
0
0
0
1
0
1
-0.603213
-0.507465
28
0
0
0
1
0
0
1
1
0
0
0
1
0.800856
-0.090356
29
2
0
1
0
1
0
0
0
0
1
0
1
-0.530458
-0.248433
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
388
0
0
1
0
0
1
0
0
0
1
0
1
-0.717056
-0.498056
389
3
1
1
0
0
0
1
0
0
1
0
1
-1.855491
-0.259261
390
0
0
0
1
0
0
1
1
0
0
0
1
-0.565265
1.038659
391
0
1
0
1
0
0
1
1
0
0
1
0
1.559813
0.069140
392
0
2
1
0
0
0
1
0
0
1
0
1
-1.324222
-0.274045
393
0
0
1
0
0
0
1
0
1
0
0
1
1.256230
-0.448774
394
3
1
1
0
0
0
1
0
0
1
0
1
-0.109891
-0.242236
395
1
0
0
1
0
0
1
1
0
0
1
0
-0.944743
0.438310
396
0
0
1
0
0
1
0
0
0
1
0
1
-0.489370
-0.507017
397
1
1
0
1
1
0
0
1
0
0
1
0
1.332126
0.782391
398
0
0
1
0
0
0
1
0
0
1
0
1
-0.641161
-0.497608
399
0
0
1
0
0
1
0
0
0
1
0
1
0.041900
-0.498356
400
0
0
0
1
0
0
1
1
0
0
1
0
-0.033996
2.317614
401
1
0
1
0
0
0
1
0
1
0
0
1
0.573169
-0.260605
402
0
1
1
0
1
0
0
1
0
0
1
0
-0.641161
0.427557
403
0
0
1
0
0
0
1
1
0
0
0
1
-1.020639
0.207130
404
1
0
0
1
1
0
0
1
0
0
0
1
0.952648
-0.140162
405
0
0
0
1
1
0
0
0
1
0
0
1
-0.792952
-0.388515
406
1
0
1
0
0
0
1
0
1
0
0
1
-0.565265
-0.448774
407
1
1
0
1
1
0
0
1
0
0
0
1
1.483917
3.153324
408
0
0
1
0
0
1
0
0
0
1
1
0
-0.800877
-0.498580
409
1
1
1
0
0
0
1
0
0
1
1
0
-2.083178
-0.390083
410
0
0
1
0
0
1
0
0
0
1
1
0
0.367934
-0.498056
411
1
0
0
1
0
1
0
1
0
0
1
0
0.497274
0.975936
412
0
0
1
0
0
0
1
0
0
1
1
0
-0.185787
-0.497608
413
0
0
1
0
0
0
1
0
0
1
0
1
0.019566
-0.492680
414
0
0
0
1
1
0
0
1
0
0
1
0
0.649065
1.314641
415
0
0
1
0
0
0
1
0
0
1
0
1
0.611117
-0.507017
416
0
0
1
0
0
0
1
0
0
1
0
1
0.019566
-0.492680
417
1
1
1
0
1
0
0
0
0
1
0
1
-0.353251
-0.236263
418 rows × 14 columns
In [94]:
result = pd.DataFrame({'PassengerId':data_test['PassengerId'].as_matrix(),'Survied':predictions.astype(np.int32)})
In [95]:
result
Out[95]:
PassengerId
Survied
0
892
0
1
893
0
2
894
0
3
895
0
4
896
1
5
897
0
6
898
1
7
899
0
8
900
1
9
901
0
10
902
0
11
903
0
12
904
1
13
905
0
14
906
1
15
907
1
16
908
0
17
909
0
18
910
1
19
911
1
20
912
0
21
913
0
22
914
1
23
915
0
24
916
1
25
917
0
26
918
1
27
919
0
28
920
0
29
921
0
...
...
...
388
1280
0
389
1281
0
390
1282
1
391
1283
1
392
1284
0
393
1285
0
394
1286
0
395
1287
1
396
1288
0
397
1289
1
398
1290
0
399
1291
0
400
1292
1
401
1293
0
402
1294
1
403
1295
0
404
1296
0
405
1297
1
406
1298
0
407
1299
0
408
1300
1
409
1301
1
410
1302
1
411
1303
1
412
1304
1
413
1305
0
414
1306
1
415
1307
0
416
1308
0
417
1309
0
418 rows × 2 columns
In [96]:
result.to_csv('result.csv')
In [ ]:
Content source: coolralf/KaggleTraining
Similar notebooks: