In [55]:
import pandas as pd
import numpy as np
from pandas import Series,DataFrame

data_train = pd.read_csv("train.csv")

In [56]:
data_train


Out[56]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 G6 S
11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 C103 S
12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 NaN S
13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 NaN S
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 0 350406 7.8542 NaN S
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 NaN S
16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 NaN Q
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 NaN S
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 0 345763 18.0000 NaN S
19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 NaN C
20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 NaN S
21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 D56 S
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.0 0 0 330923 8.0292 NaN Q
23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 A6 S
24 25 0 3 Palsson, Miss. Torborg Danira female 8.0 3 1 349909 21.0750 NaN S
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 5 347077 31.3875 NaN S
26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 NaN C
27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 C23 C25 C27 S
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 NaN Q
29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 NaN S
... ... ... ... ... ... ... ... ... ... ... ... ...
861 862 0 2 Giles, Mr. Frederick Edward male 21.0 1 0 28134 11.5000 NaN S
862 863 1 1 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 0 17466 25.9292 D17 S
863 864 0 3 Sage, Miss. Dorothy Edith "Dolly" female NaN 8 2 CA. 2343 69.5500 NaN S
864 865 0 2 Gill, Mr. John William male 24.0 0 0 233866 13.0000 NaN S
865 866 1 2 Bystrom, Mrs. (Karolina) female 42.0 0 0 236852 13.0000 NaN S
866 867 1 2 Duran y More, Miss. Asuncion female 27.0 1 0 SC/PARIS 2149 13.8583 NaN C
867 868 0 1 Roebling, Mr. Washington Augustus II male 31.0 0 0 PC 17590 50.4958 A24 S
868 869 0 3 van Melkebeke, Mr. Philemon male NaN 0 0 345777 9.5000 NaN S
869 870 1 3 Johnson, Master. Harold Theodor male 4.0 1 1 347742 11.1333 NaN S
870 871 0 3 Balkic, Mr. Cerin male 26.0 0 0 349248 7.8958 NaN S
871 872 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 1 11751 52.5542 D35 S
872 873 0 1 Carlsson, Mr. Frans Olof male 33.0 0 0 695 5.0000 B51 B53 B55 S
873 874 0 3 Vander Cruyssen, Mr. Victor male 47.0 0 0 345765 9.0000 NaN S
874 875 1 2 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 0 P/PP 3381 24.0000 NaN C
875 876 1 3 Najib, Miss. Adele Kiamie "Jane" female 15.0 0 0 2667 7.2250 NaN C
876 877 0 3 Gustafsson, Mr. Alfred Ossian male 20.0 0 0 7534 9.8458 NaN S
877 878 0 3 Petroff, Mr. Nedelio male 19.0 0 0 349212 7.8958 NaN S
878 879 0 3 Laleff, Mr. Kristo male NaN 0 0 349217 7.8958 NaN S
879 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 1 11767 83.1583 C50 C
880 881 1 2 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 1 230433 26.0000 NaN S
881 882 0 3 Markun, Mr. Johann male 33.0 0 0 349257 7.8958 NaN S
882 883 0 3 Dahlberg, Miss. Gerda Ulrika female 22.0 0 0 7552 10.5167 NaN S
883 884 0 2 Banfield, Mr. Frederick James male 28.0 0 0 C.A./SOTON 34068 10.5000 NaN S
884 885 0 3 Sutehall, Mr. Henry Jr male 25.0 0 0 SOTON/OQ 392076 7.0500 NaN S
885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 NaN Q
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 NaN S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 12 columns


In [57]:
from sklearn.ensemble import RandomForestRegressor

def set_missing_ages(df):
    age_df = df[['Age','Fare','Parch','SibSp','Pclass']]

    known_age = age_df[age_df.Age.notnull()].as_matrix()
    unknown_age = age_df[age_df.Age.isnull()].as_matrix()

    y = known_age[:,0]
    X = known_age[:,1:]

    rfr = RandomForestRegressor(random_state=0,n_estimators=2000,n_jobs=-1)
    rfr.fit(X,y)

    predictedAges = rfr.predict(unknown_age[:,1::])

    df.loc[(df.Age.isnull()),'Age'] = predictedAges

    return df,rfr

def set_Cabin_type(df):
    df.loc[(df.Cabin.notnull()),'Cabin'] = 'Yes'
    df.loc[(df.Cabin.isnull()),'Cabin'] = 'No'
    return df

In [58]:
data_train, rfr = set_missing_ages(data_train)
data_train = set_Cabin_type(data_train)

In [59]:
data_train


Out[59]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.000000 1 0 A/5 21171 7.2500 No S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.000000 1 0 PC 17599 71.2833 Yes C
2 3 1 3 Heikkinen, Miss. Laina female 26.000000 0 0 STON/O2. 3101282 7.9250 No S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.000000 1 0 113803 53.1000 Yes S
4 5 0 3 Allen, Mr. William Henry male 35.000000 0 0 373450 8.0500 No S
5 6 0 3 Moran, Mr. James male 23.828953 0 0 330877 8.4583 No Q
6 7 0 1 McCarthy, Mr. Timothy J male 54.000000 0 0 17463 51.8625 Yes S
7 8 0 3 Palsson, Master. Gosta Leonard male 2.000000 3 1 349909 21.0750 No S
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.000000 0 2 347742 11.1333 No S
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.000000 1 0 237736 30.0708 No C
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.000000 1 1 PP 9549 16.7000 Yes S
11 12 1 1 Bonnell, Miss. Elizabeth female 58.000000 0 0 113783 26.5500 Yes S
12 13 0 3 Saundercock, Mr. William Henry male 20.000000 0 0 A/5. 2151 8.0500 No S
13 14 0 3 Andersson, Mr. Anders Johan male 39.000000 1 5 347082 31.2750 No S
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.000000 0 0 350406 7.8542 No S
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.000000 0 0 248706 16.0000 No S
16 17 0 3 Rice, Master. Eugene male 2.000000 4 1 382652 29.1250 No Q
17 18 1 2 Williams, Mr. Charles Eugene male 32.066493 0 0 244373 13.0000 No S
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.000000 1 0 345763 18.0000 No S
19 20 1 3 Masselmani, Mrs. Fatima female 29.518205 0 0 2649 7.2250 No C
20 21 0 2 Fynney, Mr. Joseph J male 35.000000 0 0 239865 26.0000 No S
21 22 1 2 Beesley, Mr. Lawrence male 34.000000 0 0 248698 13.0000 Yes S
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.000000 0 0 330923 8.0292 No Q
23 24 1 1 Sloper, Mr. William Thompson male 28.000000 0 0 113788 35.5000 Yes S
24 25 0 3 Palsson, Miss. Torborg Danira female 8.000000 3 1 349909 21.0750 No S
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.000000 1 5 347077 31.3875 No S
26 27 0 3 Emir, Mr. Farred Chehab male 29.518205 0 0 2631 7.2250 No C
27 28 0 1 Fortune, Mr. Charles Alexander male 19.000000 3 2 19950 263.0000 Yes S
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female 22.380113 0 0 330959 7.8792 No Q
29 30 0 3 Todoroff, Mr. Lalio male 27.947206 0 0 349216 7.8958 No S
... ... ... ... ... ... ... ... ... ... ... ... ...
861 862 0 2 Giles, Mr. Frederick Edward male 21.000000 1 0 28134 11.5000 No S
862 863 1 1 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.000000 0 0 17466 25.9292 Yes S
863 864 0 3 Sage, Miss. Dorothy Edith "Dolly" female 10.869867 8 2 CA. 2343 69.5500 No S
864 865 0 2 Gill, Mr. John William male 24.000000 0 0 233866 13.0000 No S
865 866 1 2 Bystrom, Mrs. (Karolina) female 42.000000 0 0 236852 13.0000 No S
866 867 1 2 Duran y More, Miss. Asuncion female 27.000000 1 0 SC/PARIS 2149 13.8583 No C
867 868 0 1 Roebling, Mr. Washington Augustus II male 31.000000 0 0 PC 17590 50.4958 Yes S
868 869 0 3 van Melkebeke, Mr. Philemon male 25.977889 0 0 345777 9.5000 No S
869 870 1 3 Johnson, Master. Harold Theodor male 4.000000 1 1 347742 11.1333 No S
870 871 0 3 Balkic, Mr. Cerin male 26.000000 0 0 349248 7.8958 No S
871 872 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.000000 1 1 11751 52.5542 Yes S
872 873 0 1 Carlsson, Mr. Frans Olof male 33.000000 0 0 695 5.0000 Yes S
873 874 0 3 Vander Cruyssen, Mr. Victor male 47.000000 0 0 345765 9.0000 No S
874 875 1 2 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.000000 1 0 P/PP 3381 24.0000 No C
875 876 1 3 Najib, Miss. Adele Kiamie "Jane" female 15.000000 0 0 2667 7.2250 No C
876 877 0 3 Gustafsson, Mr. Alfred Ossian male 20.000000 0 0 7534 9.8458 No S
877 878 0 3 Petroff, Mr. Nedelio male 19.000000 0 0 349212 7.8958 No S
878 879 0 3 Laleff, Mr. Kristo male 27.947206 0 0 349217 7.8958 No S
879 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.000000 0 1 11767 83.1583 Yes C
880 881 1 2 Shelley, Mrs. William (Imanita Parrish Hall) female 25.000000 0 1 230433 26.0000 No S
881 882 0 3 Markun, Mr. Johann male 33.000000 0 0 349257 7.8958 No S
882 883 0 3 Dahlberg, Miss. Gerda Ulrika female 22.000000 0 0 7552 10.5167 No S
883 884 0 2 Banfield, Mr. Frederick James male 28.000000 0 0 C.A./SOTON 34068 10.5000 No S
884 885 0 3 Sutehall, Mr. Henry Jr male 25.000000 0 0 SOTON/OQ 392076 7.0500 No S
885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.000000 0 5 382652 29.1250 No Q
886 887 0 2 Montvila, Rev. Juozas male 27.000000 0 0 211536 13.0000 No S
887 888 1 1 Graham, Miss. Margaret Edith female 19.000000 0 0 112053 30.0000 Yes S
888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female 16.127950 1 2 W./C. 6607 23.4500 No S
889 890 1 1 Behr, Mr. Karl Howell male 26.000000 0 0 111369 30.0000 Yes C
890 891 0 3 Dooley, Mr. Patrick male 32.000000 0 0 370376 7.7500 No Q

891 rows × 12 columns


In [60]:
dummies_Cabin = pd.get_dummies(data_train["Cabin"],prefix='Cabin')
dummies_Embarked = pd.get_dummies(data_train['Embarked'],prefix='Embarked')
dummies_Sex = pd.get_dummies(data_train['Sex'],prefix='Sex')
dummies_Pclass = pd.get_dummies(data_train['Pclass'],prefix='Pclass')
df = pd.concat([data_train,dummies_Cabin,dummies_Embarked,dummies_Pclass,dummies_Sex],axis = 1)
df.drop(['Cabin','Pclass','Sex','Name','Ticket','Embarked'],axis = 1,inplace=True)

In [61]:
df


Out[61]:
PassengerId Survived Age SibSp Parch Fare Cabin_No Cabin_Yes Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3 Sex_female Sex_male
0 1 0 22.000000 1 0 7.2500 1 0 0 0 1 0 0 1 0 1
1 2 1 38.000000 1 0 71.2833 0 1 1 0 0 1 0 0 1 0
2 3 1 26.000000 0 0 7.9250 1 0 0 0 1 0 0 1 1 0
3 4 1 35.000000 1 0 53.1000 0 1 0 0 1 1 0 0 1 0
4 5 0 35.000000 0 0 8.0500 1 0 0 0 1 0 0 1 0 1
5 6 0 23.828953 0 0 8.4583 1 0 0 1 0 0 0 1 0 1
6 7 0 54.000000 0 0 51.8625 0 1 0 0 1 1 0 0 0 1
7 8 0 2.000000 3 1 21.0750 1 0 0 0 1 0 0 1 0 1
8 9 1 27.000000 0 2 11.1333 1 0 0 0 1 0 0 1 1 0
9 10 1 14.000000 1 0 30.0708 1 0 1 0 0 0 1 0 1 0
10 11 1 4.000000 1 1 16.7000 0 1 0 0 1 0 0 1 1 0
11 12 1 58.000000 0 0 26.5500 0 1 0 0 1 1 0 0 1 0
12 13 0 20.000000 0 0 8.0500 1 0 0 0 1 0 0 1 0 1
13 14 0 39.000000 1 5 31.2750 1 0 0 0 1 0 0 1 0 1
14 15 0 14.000000 0 0 7.8542 1 0 0 0 1 0 0 1 1 0
15 16 1 55.000000 0 0 16.0000 1 0 0 0 1 0 1 0 1 0
16 17 0 2.000000 4 1 29.1250 1 0 0 1 0 0 0 1 0 1
17 18 1 32.066493 0 0 13.0000 1 0 0 0 1 0 1 0 0 1
18 19 0 31.000000 1 0 18.0000 1 0 0 0 1 0 0 1 1 0
19 20 1 29.518205 0 0 7.2250 1 0 1 0 0 0 0 1 1 0
20 21 0 35.000000 0 0 26.0000 1 0 0 0 1 0 1 0 0 1
21 22 1 34.000000 0 0 13.0000 0 1 0 0 1 0 1 0 0 1
22 23 1 15.000000 0 0 8.0292 1 0 0 1 0 0 0 1 1 0
23 24 1 28.000000 0 0 35.5000 0 1 0 0 1 1 0 0 0 1
24 25 0 8.000000 3 1 21.0750 1 0 0 0 1 0 0 1 1 0
25 26 1 38.000000 1 5 31.3875 1 0 0 0 1 0 0 1 1 0
26 27 0 29.518205 0 0 7.2250 1 0 1 0 0 0 0 1 0 1
27 28 0 19.000000 3 2 263.0000 0 1 0 0 1 1 0 0 0 1
28 29 1 22.380113 0 0 7.8792 1 0 0 1 0 0 0 1 1 0
29 30 0 27.947206 0 0 7.8958 1 0 0 0 1 0 0 1 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
861 862 0 21.000000 1 0 11.5000 1 0 0 0 1 0 1 0 0 1
862 863 1 48.000000 0 0 25.9292 0 1 0 0 1 1 0 0 1 0
863 864 0 10.869867 8 2 69.5500 1 0 0 0 1 0 0 1 1 0
864 865 0 24.000000 0 0 13.0000 1 0 0 0 1 0 1 0 0 1
865 866 1 42.000000 0 0 13.0000 1 0 0 0 1 0 1 0 1 0
866 867 1 27.000000 1 0 13.8583 1 0 1 0 0 0 1 0 1 0
867 868 0 31.000000 0 0 50.4958 0 1 0 0 1 1 0 0 0 1
868 869 0 25.977889 0 0 9.5000 1 0 0 0 1 0 0 1 0 1
869 870 1 4.000000 1 1 11.1333 1 0 0 0 1 0 0 1 0 1
870 871 0 26.000000 0 0 7.8958 1 0 0 0 1 0 0 1 0 1
871 872 1 47.000000 1 1 52.5542 0 1 0 0 1 1 0 0 1 0
872 873 0 33.000000 0 0 5.0000 0 1 0 0 1 1 0 0 0 1
873 874 0 47.000000 0 0 9.0000 1 0 0 0 1 0 0 1 0 1
874 875 1 28.000000 1 0 24.0000 1 0 1 0 0 0 1 0 1 0
875 876 1 15.000000 0 0 7.2250 1 0 1 0 0 0 0 1 1 0
876 877 0 20.000000 0 0 9.8458 1 0 0 0 1 0 0 1 0 1
877 878 0 19.000000 0 0 7.8958 1 0 0 0 1 0 0 1 0 1
878 879 0 27.947206 0 0 7.8958 1 0 0 0 1 0 0 1 0 1
879 880 1 56.000000 0 1 83.1583 0 1 1 0 0 1 0 0 1 0
880 881 1 25.000000 0 1 26.0000 1 0 0 0 1 0 1 0 1 0
881 882 0 33.000000 0 0 7.8958 1 0 0 0 1 0 0 1 0 1
882 883 0 22.000000 0 0 10.5167 1 0 0 0 1 0 0 1 1 0
883 884 0 28.000000 0 0 10.5000 1 0 0 0 1 0 1 0 0 1
884 885 0 25.000000 0 0 7.0500 1 0 0 0 1 0 0 1 0 1
885 886 0 39.000000 0 5 29.1250 1 0 0 1 0 0 0 1 1 0
886 887 0 27.000000 0 0 13.0000 1 0 0 0 1 0 1 0 0 1
887 888 1 19.000000 0 0 30.0000 0 1 0 0 1 1 0 0 1 0
888 889 0 16.127950 1 2 23.4500 1 0 0 0 1 0 0 1 1 0
889 890 1 26.000000 0 0 30.0000 0 1 1 0 0 1 0 0 0 1
890 891 0 32.000000 0 0 7.7500 1 0 0 1 0 0 0 1 0 1

891 rows × 16 columns


In [62]:
import sklearn.preprocessing as preprocessing
scaler = preprocessing.StandardScaler()
age_scale_param = scaler.fit(df['Age'])
df['Age_scaled'] = scaler.fit_transform(df['Age'],age_scale_param)
fare_scale_param = scaler.fit(df['Fare'])
df['Fare_scaled']= scaler.fit_transform(df['Fare'],fare_scale_param)


D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)

In [63]:
df


Out[63]:
PassengerId Survived Age SibSp Parch Fare Cabin_No Cabin_Yes Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3 Sex_female Sex_male Age_scaled Fare_scaled
0 1 0 22.000000 1 0 7.2500 1 0 0 0 1 0 0 1 0 1 -0.561363 -0.502445
1 2 1 38.000000 1 0 71.2833 0 1 1 0 0 1 0 0 1 0 0.613182 0.786845
2 3 1 26.000000 0 0 7.9250 1 0 0 0 1 0 0 1 1 0 -0.267727 -0.488854
3 4 1 35.000000 1 0 53.1000 0 1 0 0 1 1 0 0 1 0 0.392955 0.420730
4 5 0 35.000000 0 0 8.0500 1 0 0 0 1 0 0 1 0 1 0.392955 -0.486337
5 6 0 23.828953 0 0 8.4583 1 0 0 1 0 0 0 1 0 1 -0.427102 -0.478116
6 7 0 54.000000 0 0 51.8625 0 1 0 0 1 1 0 0 0 1 1.787727 0.395814
7 8 0 2.000000 3 1 21.0750 1 0 0 0 1 0 0 1 0 1 -2.029545 -0.224083
8 9 1 27.000000 0 2 11.1333 1 0 0 0 1 0 0 1 1 0 -0.194318 -0.424256
9 10 1 14.000000 1 0 30.0708 1 0 1 0 0 0 1 0 1 0 -1.148636 -0.042956
10 11 1 4.000000 1 1 16.7000 0 1 0 0 1 0 0 1 1 0 -1.882726 -0.312172
11 12 1 58.000000 0 0 26.5500 0 1 0 0 1 1 0 0 1 0 2.081363 -0.113846
12 13 0 20.000000 0 0 8.0500 1 0 0 0 1 0 0 1 0 1 -0.708181 -0.486337
13 14 0 39.000000 1 5 31.2750 1 0 0 0 1 0 0 1 0 1 0.686591 -0.018709
14 15 0 14.000000 0 0 7.8542 1 0 0 0 1 0 0 1 1 0 -1.148636 -0.490280
15 16 1 55.000000 0 0 16.0000 1 0 0 0 1 0 1 0 1 0 1.861136 -0.326267
16 17 0 2.000000 4 1 29.1250 1 0 0 1 0 0 0 1 0 1 -2.029545 -0.061999
17 18 1 32.066493 0 0 13.0000 1 0 0 0 1 0 1 0 0 1 0.177609 -0.386671
18 19 0 31.000000 1 0 18.0000 1 0 0 0 1 0 0 1 1 0 0.099318 -0.285997
19 20 1 29.518205 0 0 7.2250 1 0 1 0 0 0 0 1 1 0 -0.009459 -0.502949
20 21 0 35.000000 0 0 26.0000 1 0 0 0 1 0 1 0 0 1 0.392955 -0.124920
21 22 1 34.000000 0 0 13.0000 0 1 0 0 1 0 1 0 0 1 0.319546 -0.386671
22 23 1 15.000000 0 0 8.0292 1 0 0 1 0 0 0 1 1 0 -1.075227 -0.486756
23 24 1 28.000000 0 0 35.5000 0 1 0 0 1 1 0 0 0 1 -0.120909 0.066360
24 25 0 8.000000 3 1 21.0750 1 0 0 0 1 0 0 1 1 0 -1.589090 -0.224083
25 26 1 38.000000 1 5 31.3875 1 0 0 0 1 0 0 1 1 0 0.613182 -0.016444
26 27 0 29.518205 0 0 7.2250 1 0 1 0 0 0 0 1 0 1 -0.009459 -0.502949
27 28 0 19.000000 3 2 263.0000 0 1 0 0 1 1 0 0 0 1 -0.781590 4.647001
28 29 1 22.380113 0 0 7.8792 1 0 0 1 0 0 0 1 1 0 -0.533459 -0.489776
29 30 0 27.947206 0 0 7.8958 1 0 0 0 1 0 0 1 0 1 -0.124784 -0.489442
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
861 862 0 21.000000 1 0 11.5000 1 0 0 0 1 0 1 0 0 1 -0.634772 -0.416873
862 863 1 48.000000 0 0 25.9292 0 1 0 0 1 1 0 0 1 0 1.347272 -0.126345
863 864 0 10.869867 8 2 69.5500 1 0 0 0 1 0 0 1 1 0 -1.378416 0.751946
864 865 0 24.000000 0 0 13.0000 1 0 0 0 1 0 1 0 0 1 -0.414545 -0.386671
865 866 1 42.000000 0 0 13.0000 1 0 0 0 1 0 1 0 1 0 0.906818 -0.386671
866 867 1 27.000000 1 0 13.8583 1 0 1 0 0 0 1 0 1 0 -0.194318 -0.369389
867 868 0 31.000000 0 0 50.4958 0 1 0 0 1 1 0 0 0 1 0.099318 0.368295
868 869 0 25.977889 0 0 9.5000 1 0 0 0 1 0 0 1 0 1 -0.269350 -0.457142
869 870 1 4.000000 1 1 11.1333 1 0 0 0 1 0 0 1 0 1 -1.882726 -0.424256
870 871 0 26.000000 0 0 7.8958 1 0 0 0 1 0 0 1 0 1 -0.267727 -0.489442
871 872 1 47.000000 1 1 52.5542 0 1 0 0 1 1 0 0 1 0 1.273863 0.409741
872 873 0 33.000000 0 0 5.0000 0 1 0 0 1 1 0 0 0 1 0.246136 -0.547748
873 874 0 47.000000 0 0 9.0000 1 0 0 0 1 0 0 1 0 1 1.273863 -0.467209
874 875 1 28.000000 1 0 24.0000 1 0 1 0 0 0 1 0 1 0 -0.120909 -0.165189
875 876 1 15.000000 0 0 7.2250 1 0 1 0 0 0 0 1 1 0 -1.075227 -0.502949
876 877 0 20.000000 0 0 9.8458 1 0 0 0 1 0 0 1 0 1 -0.708181 -0.450180
877 878 0 19.000000 0 0 7.8958 1 0 0 0 1 0 0 1 0 1 -0.781590 -0.489442
878 879 0 27.947206 0 0 7.8958 1 0 0 0 1 0 0 1 0 1 -0.124784 -0.489442
879 880 1 56.000000 0 1 83.1583 0 1 1 0 0 1 0 0 1 0 1.934545 1.025945
880 881 1 25.000000 0 1 26.0000 1 0 0 0 1 0 1 0 1 0 -0.341136 -0.124920
881 882 0 33.000000 0 0 7.8958 1 0 0 0 1 0 0 1 0 1 0.246136 -0.489442
882 883 0 22.000000 0 0 10.5167 1 0 0 0 1 0 0 1 1 0 -0.561363 -0.436671
883 884 0 28.000000 0 0 10.5000 1 0 0 0 1 0 1 0 0 1 -0.120909 -0.437007
884 885 0 25.000000 0 0 7.0500 1 0 0 0 1 0 0 1 0 1 -0.341136 -0.506472
885 886 0 39.000000 0 5 29.1250 1 0 0 1 0 0 0 1 1 0 0.686591 -0.061999
886 887 0 27.000000 0 0 13.0000 1 0 0 0 1 0 1 0 0 1 -0.194318 -0.386671
887 888 1 19.000000 0 0 30.0000 0 1 0 0 1 1 0 0 1 0 -0.781590 -0.044381
888 889 0 16.127950 1 2 23.4500 1 0 0 0 1 0 0 1 1 0 -0.992425 -0.176263
889 890 1 26.000000 0 0 30.0000 0 1 1 0 0 1 0 0 0 1 -0.267727 -0.044381
890 891 0 32.000000 0 0 7.7500 1 0 0 1 0 0 0 1 0 1 0.172727 -0.492378

891 rows × 18 columns


In [85]:
from sklearn import linear_model

train_df = df.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
train_np = train_df.as_matrix()

y = train_np[:,0]

X = train_np[:,1:]

clf = linear_model.LogisticRegression(C=1.0,penalty='l1',tol=1e-6)
clf.fit(X,y)


Out[85]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=None, solver='liblinear', tol=1e-06,
          verbose=0, warm_start=False)

In [87]:
train_df


Out[87]:
Survived SibSp Parch Cabin_No Cabin_Yes Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3 Sex_female Sex_male Age_scaled Fare_scaled
0 0 1 0 1 0 0 0 1 0 0 1 0 1 -0.561363 -0.502445
1 1 1 0 0 1 1 0 0 1 0 0 1 0 0.613182 0.786845
2 1 0 0 1 0 0 0 1 0 0 1 1 0 -0.267727 -0.488854
3 1 1 0 0 1 0 0 1 1 0 0 1 0 0.392955 0.420730
4 0 0 0 1 0 0 0 1 0 0 1 0 1 0.392955 -0.486337
5 0 0 0 1 0 0 1 0 0 0 1 0 1 -0.427102 -0.478116
6 0 0 0 0 1 0 0 1 1 0 0 0 1 1.787727 0.395814
7 0 3 1 1 0 0 0 1 0 0 1 0 1 -2.029545 -0.224083
8 1 0 2 1 0 0 0 1 0 0 1 1 0 -0.194318 -0.424256
9 1 1 0 1 0 1 0 0 0 1 0 1 0 -1.148636 -0.042956
10 1 1 1 0 1 0 0 1 0 0 1 1 0 -1.882726 -0.312172
11 1 0 0 0 1 0 0 1 1 0 0 1 0 2.081363 -0.113846
12 0 0 0 1 0 0 0 1 0 0 1 0 1 -0.708181 -0.486337
13 0 1 5 1 0 0 0 1 0 0 1 0 1 0.686591 -0.018709
14 0 0 0 1 0 0 0 1 0 0 1 1 0 -1.148636 -0.490280
15 1 0 0 1 0 0 0 1 0 1 0 1 0 1.861136 -0.326267
16 0 4 1 1 0 0 1 0 0 0 1 0 1 -2.029545 -0.061999
17 1 0 0 1 0 0 0 1 0 1 0 0 1 0.177609 -0.386671
18 0 1 0 1 0 0 0 1 0 0 1 1 0 0.099318 -0.285997
19 1 0 0 1 0 1 0 0 0 0 1 1 0 -0.009459 -0.502949
20 0 0 0 1 0 0 0 1 0 1 0 0 1 0.392955 -0.124920
21 1 0 0 0 1 0 0 1 0 1 0 0 1 0.319546 -0.386671
22 1 0 0 1 0 0 1 0 0 0 1 1 0 -1.075227 -0.486756
23 1 0 0 0 1 0 0 1 1 0 0 0 1 -0.120909 0.066360
24 0 3 1 1 0 0 0 1 0 0 1 1 0 -1.589090 -0.224083
25 1 1 5 1 0 0 0 1 0 0 1 1 0 0.613182 -0.016444
26 0 0 0 1 0 1 0 0 0 0 1 0 1 -0.009459 -0.502949
27 0 3 2 0 1 0 0 1 1 0 0 0 1 -0.781590 4.647001
28 1 0 0 1 0 0 1 0 0 0 1 1 0 -0.533459 -0.489776
29 0 0 0 1 0 0 0 1 0 0 1 0 1 -0.124784 -0.489442
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
861 0 1 0 1 0 0 0 1 0 1 0 0 1 -0.634772 -0.416873
862 1 0 0 0 1 0 0 1 1 0 0 1 0 1.347272 -0.126345
863 0 8 2 1 0 0 0 1 0 0 1 1 0 -1.378416 0.751946
864 0 0 0 1 0 0 0 1 0 1 0 0 1 -0.414545 -0.386671
865 1 0 0 1 0 0 0 1 0 1 0 1 0 0.906818 -0.386671
866 1 1 0 1 0 1 0 0 0 1 0 1 0 -0.194318 -0.369389
867 0 0 0 0 1 0 0 1 1 0 0 0 1 0.099318 0.368295
868 0 0 0 1 0 0 0 1 0 0 1 0 1 -0.269350 -0.457142
869 1 1 1 1 0 0 0 1 0 0 1 0 1 -1.882726 -0.424256
870 0 0 0 1 0 0 0 1 0 0 1 0 1 -0.267727 -0.489442
871 1 1 1 0 1 0 0 1 1 0 0 1 0 1.273863 0.409741
872 0 0 0 0 1 0 0 1 1 0 0 0 1 0.246136 -0.547748
873 0 0 0 1 0 0 0 1 0 0 1 0 1 1.273863 -0.467209
874 1 1 0 1 0 1 0 0 0 1 0 1 0 -0.120909 -0.165189
875 1 0 0 1 0 1 0 0 0 0 1 1 0 -1.075227 -0.502949
876 0 0 0 1 0 0 0 1 0 0 1 0 1 -0.708181 -0.450180
877 0 0 0 1 0 0 0 1 0 0 1 0 1 -0.781590 -0.489442
878 0 0 0 1 0 0 0 1 0 0 1 0 1 -0.124784 -0.489442
879 1 0 1 0 1 1 0 0 1 0 0 1 0 1.934545 1.025945
880 1 0 1 1 0 0 0 1 0 1 0 1 0 -0.341136 -0.124920
881 0 0 0 1 0 0 0 1 0 0 1 0 1 0.246136 -0.489442
882 0 0 0 1 0 0 0 1 0 0 1 1 0 -0.561363 -0.436671
883 0 0 0 1 0 0 0 1 0 1 0 0 1 -0.120909 -0.437007
884 0 0 0 1 0 0 0 1 0 0 1 0 1 -0.341136 -0.506472
885 0 0 5 1 0 0 1 0 0 0 1 1 0 0.686591 -0.061999
886 0 0 0 1 0 0 0 1 0 1 0 0 1 -0.194318 -0.386671
887 1 0 0 0 1 0 0 1 1 0 0 1 0 -0.781590 -0.044381
888 0 1 2 1 0 0 0 1 0 0 1 1 0 -0.992425 -0.176263
889 1 0 0 0 1 1 0 0 1 0 0 0 1 -0.267727 -0.044381
890 0 0 0 1 0 0 1 0 0 0 1 0 1 0.172727 -0.492378

891 rows × 15 columns


In [88]:
data_test= pd.read_csv("test.csv")
data_test.loc[(data_test.Fare.isnull()),'Fare']= 0

tmp_df = data_test[['Age','Fare','Parch','SibSp','Pclass']]
null_age = tmp_df[data_test.Age.isnull()].as_matrix()

X = null_age[:,1:]
predictedAges = rfr.predict(X)
data_test.loc[(data_test.Age.isnull()),'Age'] = predictedAges

data_test = set_Cabin_type(data_test)
dummies_Cabin = pd.get_dummies(data_test["Cabin"],prefix='Cabin')
dummies_Embarked = pd.get_dummies(data_test['Embarked'],prefix='Embarked')
dummies_Sex = pd.get_dummies(data_test['Sex'],prefix='Sex')
dummies_Pclass = pd.get_dummies(data_test['Pclass'],prefix='Pclass')

df_test = pd.concat([data_test,dummies_Cabin,dummies_Embarked,dummies_Pclass,dummies_Sex],axis = 1)
df_test.drop(['Cabin','Pclass','Sex','Name','Ticket','Embarked'],axis = 1,inplace=True)

In [89]:
df_test


Out[89]:
PassengerId Age SibSp Parch Fare Cabin_No Cabin_Yes Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3 Sex_female Sex_male
0 892 34.500000 0 0 7.8292 1 0 0 1 0 0 0 1 0 1
1 893 47.000000 1 0 7.0000 1 0 0 0 1 0 0 1 1 0
2 894 62.000000 0 0 9.6875 1 0 0 1 0 0 1 0 0 1
3 895 27.000000 0 0 8.6625 1 0 0 0 1 0 0 1 0 1
4 896 22.000000 1 1 12.2875 1 0 0 0 1 0 0 1 1 0
5 897 14.000000 0 0 9.2250 1 0 0 0 1 0 0 1 0 1
6 898 30.000000 0 0 7.6292 1 0 0 1 0 0 0 1 1 0
7 899 26.000000 1 1 29.0000 1 0 0 0 1 0 1 0 0 1
8 900 18.000000 0 0 7.2292 1 0 1 0 0 0 0 1 1 0
9 901 21.000000 2 0 24.1500 1 0 0 0 1 0 0 1 0 1
10 902 27.947206 0 0 7.8958 1 0 0 0 1 0 0 1 0 1
11 903 46.000000 0 0 26.0000 1 0 0 0 1 1 0 0 0 1
12 904 23.000000 1 0 82.2667 0 1 0 0 1 1 0 0 1 0
13 905 63.000000 1 0 26.0000 1 0 0 0 1 0 1 0 0 1
14 906 47.000000 1 0 61.1750 0 1 0 0 1 1 0 0 1 0
15 907 24.000000 1 0 27.7208 1 0 1 0 0 0 1 0 1 0
16 908 35.000000 0 0 12.3500 1 0 0 1 0 0 1 0 0 1
17 909 21.000000 0 0 7.2250 1 0 1 0 0 0 0 1 0 1
18 910 27.000000 1 0 7.9250 1 0 0 0 1 0 0 1 1 0
19 911 45.000000 0 0 7.2250 1 0 1 0 0 0 0 1 1 0
20 912 55.000000 1 0 59.4000 1 0 1 0 0 1 0 0 0 1
21 913 9.000000 0 1 3.1708 1 0 0 0 1 0 0 1 0 1
22 914 52.314311 0 0 31.6833 1 0 0 0 1 1 0 0 1 0
23 915 21.000000 0 1 61.3792 1 0 1 0 0 1 0 0 0 1
24 916 48.000000 1 3 262.3750 0 1 1 0 0 1 0 0 1 0
25 917 50.000000 1 0 14.5000 1 0 0 0 1 0 0 1 0 1
26 918 22.000000 0 1 61.9792 0 1 1 0 0 1 0 0 1 0
27 919 22.500000 0 0 7.2250 1 0 1 0 0 0 0 1 0 1
28 920 41.000000 0 0 30.5000 0 1 0 0 1 1 0 0 0 1
29 921 23.458621 2 0 21.6792 1 0 1 0 0 0 0 1 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
388 1280 21.000000 0 0 7.7500 1 0 0 1 0 0 0 1 0 1
389 1281 6.000000 3 1 21.0750 1 0 0 0 1 0 0 1 0 1
390 1282 23.000000 0 0 93.5000 0 1 0 0 1 1 0 0 0 1
391 1283 51.000000 0 1 39.4000 0 1 0 0 1 1 0 0 1 0
392 1284 13.000000 0 2 20.2500 1 0 0 0 1 0 0 1 0 1
393 1285 47.000000 0 0 10.5000 1 0 0 0 1 0 1 0 0 1
394 1286 29.000000 3 1 22.0250 1 0 0 0 1 0 0 1 0 1
395 1287 18.000000 1 0 60.0000 0 1 0 0 1 1 0 0 1 0
396 1288 24.000000 0 0 7.2500 1 0 0 1 0 0 0 1 0 1
397 1289 48.000000 1 1 79.2000 0 1 1 0 0 1 0 0 1 0
398 1290 22.000000 0 0 7.7750 1 0 0 0 1 0 0 1 0 1
399 1291 31.000000 0 0 7.7333 1 0 0 1 0 0 0 1 0 1
400 1292 30.000000 0 0 164.8667 0 1 0 0 1 1 0 0 1 0
401 1293 38.000000 1 0 21.0000 1 0 0 0 1 0 1 0 0 1
402 1294 22.000000 0 1 59.4000 1 0 1 0 0 1 0 0 1 0
403 1295 17.000000 0 0 47.1000 1 0 0 0 1 1 0 0 0 1
404 1296 43.000000 1 0 27.7208 0 1 1 0 0 1 0 0 0 1
405 1297 20.000000 0 0 13.8625 0 1 1 0 0 0 1 0 0 1
406 1298 23.000000 1 0 10.5000 1 0 0 0 1 0 1 0 0 1
407 1299 50.000000 1 1 211.5000 0 1 1 0 0 1 0 0 0 1
408 1300 19.895581 0 0 7.7208 1 0 0 1 0 0 0 1 1 0
409 1301 3.000000 1 1 13.7750 1 0 0 0 1 0 0 1 1 0
410 1302 35.295824 0 0 7.7500 1 0 0 1 0 0 0 1 1 0
411 1303 37.000000 1 0 90.0000 0 1 0 1 0 1 0 0 1 0
412 1304 28.000000 0 0 7.7750 1 0 0 0 1 0 0 1 1 0
413 1305 30.705727 0 0 8.0500 1 0 0 0 1 0 0 1 0 1
414 1306 39.000000 0 0 108.9000 0 1 1 0 0 1 0 0 1 0
415 1307 38.500000 0 0 7.2500 1 0 0 0 1 0 0 1 0 1
416 1308 30.705727 0 0 8.0500 1 0 0 0 1 0 0 1 0 1
417 1309 25.793502 1 1 22.3583 1 0 1 0 0 0 0 1 0 1

418 rows × 15 columns


In [90]:
df_test['Age_scaled'] = scaler.fit_transform(df_test['Age'],age_scale_param)
df_test['Fare_scaled']= scaler.fit_transform(df_test['Fare'],fare_scale_param)


D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\Softwares\Anacoda\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)

In [91]:
df_test


Out[91]:
PassengerId Age SibSp Parch Fare Cabin_No Cabin_Yes Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3 Sex_female Sex_male Age_scaled Fare_scaled
0 892 34.500000 0 0 7.8292 1 0 0 1 0 0 0 1 0 1 0.307535 -0.496637
1 893 47.000000 1 0 7.0000 1 0 0 0 1 0 0 1 1 0 1.256230 -0.511497
2 894 62.000000 0 0 9.6875 1 0 0 1 0 0 1 0 0 1 2.394665 -0.463335
3 895 27.000000 0 0 8.6625 1 0 0 0 1 0 0 1 0 1 -0.261683 -0.481704
4 896 22.000000 1 1 12.2875 1 0 0 0 1 0 0 1 1 0 -0.641161 -0.416740
5 897 14.000000 0 0 9.2250 1 0 0 0 1 0 0 1 0 1 -1.248326 -0.471623
6 898 30.000000 0 0 7.6292 1 0 0 1 0 0 0 1 1 0 -0.033996 -0.500221
7 899 26.000000 1 1 29.0000 1 0 0 0 1 0 1 0 0 1 -0.337578 -0.117238
8 900 18.000000 0 0 7.2292 1 0 1 0 0 0 0 1 1 0 -0.944743 -0.507390
9 901 21.000000 2 0 24.1500 1 0 0 0 1 0 0 1 0 1 -0.717056 -0.204154
10 902 27.947206 0 0 7.8958 1 0 0 0 1 0 0 1 0 1 -0.189794 -0.495444
11 903 46.000000 0 0 26.0000 1 0 0 0 1 1 0 0 0 1 1.180334 -0.171000
12 904 23.000000 1 0 82.2667 0 1 0 0 1 1 0 0 1 0 -0.565265 0.837349
13 905 63.000000 1 0 26.0000 1 0 0 0 1 0 1 0 0 1 2.470560 -0.171000
14 906 47.000000 1 0 61.1750 0 1 0 0 1 1 0 0 1 0 1.256230 0.459367
15 907 24.000000 1 0 27.7208 1 0 1 0 0 0 1 0 1 0 -0.489370 -0.140162
16 908 35.000000 0 0 12.3500 1 0 0 1 0 0 1 0 0 1 0.345482 -0.415620
17 909 21.000000 0 0 7.2250 1 0 1 0 0 0 0 1 0 1 -0.717056 -0.507465
18 910 27.000000 1 0 7.9250 1 0 0 0 1 0 0 1 1 0 -0.261683 -0.494920
19 911 45.000000 0 0 7.2250 1 0 1 0 0 0 0 1 1 0 1.104439 -0.507465
20 912 55.000000 1 0 59.4000 1 0 1 0 0 1 0 0 0 1 1.863395 0.427557
21 913 9.000000 0 1 3.1708 1 0 0 0 1 0 0 1 0 1 -1.627804 -0.580120
22 914 52.314311 0 0 31.6833 1 0 0 0 1 1 0 0 1 0 1.659563 -0.069151
23 915 21.000000 0 1 61.3792 1 0 1 0 0 1 0 0 0 1 -0.717056 0.463026
24 916 48.000000 1 3 262.3750 0 1 1 0 0 1 0 0 1 0 1.332126 4.065049
25 917 50.000000 1 0 14.5000 1 0 0 0 1 0 0 1 0 1 1.483917 -0.377090
26 918 22.000000 0 1 61.9792 0 1 1 0 0 1 0 0 1 0 -0.641161 0.473779
27 919 22.500000 0 0 7.2250 1 0 1 0 0 0 0 1 0 1 -0.603213 -0.507465
28 920 41.000000 0 0 30.5000 0 1 0 0 1 1 0 0 0 1 0.800856 -0.090356
29 921 23.458621 2 0 21.6792 1 0 1 0 0 0 0 1 0 1 -0.530458 -0.248433
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
388 1280 21.000000 0 0 7.7500 1 0 0 1 0 0 0 1 0 1 -0.717056 -0.498056
389 1281 6.000000 3 1 21.0750 1 0 0 0 1 0 0 1 0 1 -1.855491 -0.259261
390 1282 23.000000 0 0 93.5000 0 1 0 0 1 1 0 0 0 1 -0.565265 1.038659
391 1283 51.000000 0 1 39.4000 0 1 0 0 1 1 0 0 1 0 1.559813 0.069140
392 1284 13.000000 0 2 20.2500 1 0 0 0 1 0 0 1 0 1 -1.324222 -0.274045
393 1285 47.000000 0 0 10.5000 1 0 0 0 1 0 1 0 0 1 1.256230 -0.448774
394 1286 29.000000 3 1 22.0250 1 0 0 0 1 0 0 1 0 1 -0.109891 -0.242236
395 1287 18.000000 1 0 60.0000 0 1 0 0 1 1 0 0 1 0 -0.944743 0.438310
396 1288 24.000000 0 0 7.2500 1 0 0 1 0 0 0 1 0 1 -0.489370 -0.507017
397 1289 48.000000 1 1 79.2000 0 1 1 0 0 1 0 0 1 0 1.332126 0.782391
398 1290 22.000000 0 0 7.7750 1 0 0 0 1 0 0 1 0 1 -0.641161 -0.497608
399 1291 31.000000 0 0 7.7333 1 0 0 1 0 0 0 1 0 1 0.041900 -0.498356
400 1292 30.000000 0 0 164.8667 0 1 0 0 1 1 0 0 1 0 -0.033996 2.317614
401 1293 38.000000 1 0 21.0000 1 0 0 0 1 0 1 0 0 1 0.573169 -0.260605
402 1294 22.000000 0 1 59.4000 1 0 1 0 0 1 0 0 1 0 -0.641161 0.427557
403 1295 17.000000 0 0 47.1000 1 0 0 0 1 1 0 0 0 1 -1.020639 0.207130
404 1296 43.000000 1 0 27.7208 0 1 1 0 0 1 0 0 0 1 0.952648 -0.140162
405 1297 20.000000 0 0 13.8625 0 1 1 0 0 0 1 0 0 1 -0.792952 -0.388515
406 1298 23.000000 1 0 10.5000 1 0 0 0 1 0 1 0 0 1 -0.565265 -0.448774
407 1299 50.000000 1 1 211.5000 0 1 1 0 0 1 0 0 0 1 1.483917 3.153324
408 1300 19.895581 0 0 7.7208 1 0 0 1 0 0 0 1 1 0 -0.800877 -0.498580
409 1301 3.000000 1 1 13.7750 1 0 0 0 1 0 0 1 1 0 -2.083178 -0.390083
410 1302 35.295824 0 0 7.7500 1 0 0 1 0 0 0 1 1 0 0.367934 -0.498056
411 1303 37.000000 1 0 90.0000 0 1 0 1 0 1 0 0 1 0 0.497274 0.975936
412 1304 28.000000 0 0 7.7750 1 0 0 0 1 0 0 1 1 0 -0.185787 -0.497608
413 1305 30.705727 0 0 8.0500 1 0 0 0 1 0 0 1 0 1 0.019566 -0.492680
414 1306 39.000000 0 0 108.9000 0 1 1 0 0 1 0 0 1 0 0.649065 1.314641
415 1307 38.500000 0 0 7.2500 1 0 0 0 1 0 0 1 0 1 0.611117 -0.507017
416 1308 30.705727 0 0 8.0500 1 0 0 0 1 0 0 1 0 1 0.019566 -0.492680
417 1309 25.793502 1 1 22.3583 1 0 1 0 0 0 0 1 0 1 -0.353251 -0.236263

418 rows × 17 columns


In [92]:
test = df_test.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
predictions = clf.predict(test)

In [93]:
test


Out[93]:
SibSp Parch Cabin_No Cabin_Yes Embarked_C Embarked_Q Embarked_S Pclass_1 Pclass_2 Pclass_3 Sex_female Sex_male Age_scaled Fare_scaled
0 0 0 1 0 0 1 0 0 0 1 0 1 0.307535 -0.496637
1 1 0 1 0 0 0 1 0 0 1 1 0 1.256230 -0.511497
2 0 0 1 0 0 1 0 0 1 0 0 1 2.394665 -0.463335
3 0 0 1 0 0 0 1 0 0 1 0 1 -0.261683 -0.481704
4 1 1 1 0 0 0 1 0 0 1 1 0 -0.641161 -0.416740
5 0 0 1 0 0 0 1 0 0 1 0 1 -1.248326 -0.471623
6 0 0 1 0 0 1 0 0 0 1 1 0 -0.033996 -0.500221
7 1 1 1 0 0 0 1 0 1 0 0 1 -0.337578 -0.117238
8 0 0 1 0 1 0 0 0 0 1 1 0 -0.944743 -0.507390
9 2 0 1 0 0 0 1 0 0 1 0 1 -0.717056 -0.204154
10 0 0 1 0 0 0 1 0 0 1 0 1 -0.189794 -0.495444
11 0 0 1 0 0 0 1 1 0 0 0 1 1.180334 -0.171000
12 1 0 0 1 0 0 1 1 0 0 1 0 -0.565265 0.837349
13 1 0 1 0 0 0 1 0 1 0 0 1 2.470560 -0.171000
14 1 0 0 1 0 0 1 1 0 0 1 0 1.256230 0.459367
15 1 0 1 0 1 0 0 0 1 0 1 0 -0.489370 -0.140162
16 0 0 1 0 0 1 0 0 1 0 0 1 0.345482 -0.415620
17 0 0 1 0 1 0 0 0 0 1 0 1 -0.717056 -0.507465
18 1 0 1 0 0 0 1 0 0 1 1 0 -0.261683 -0.494920
19 0 0 1 0 1 0 0 0 0 1 1 0 1.104439 -0.507465
20 1 0 1 0 1 0 0 1 0 0 0 1 1.863395 0.427557
21 0 1 1 0 0 0 1 0 0 1 0 1 -1.627804 -0.580120
22 0 0 1 0 0 0 1 1 0 0 1 0 1.659563 -0.069151
23 0 1 1 0 1 0 0 1 0 0 0 1 -0.717056 0.463026
24 1 3 0 1 1 0 0 1 0 0 1 0 1.332126 4.065049
25 1 0 1 0 0 0 1 0 0 1 0 1 1.483917 -0.377090
26 0 1 0 1 1 0 0 1 0 0 1 0 -0.641161 0.473779
27 0 0 1 0 1 0 0 0 0 1 0 1 -0.603213 -0.507465
28 0 0 0 1 0 0 1 1 0 0 0 1 0.800856 -0.090356
29 2 0 1 0 1 0 0 0 0 1 0 1 -0.530458 -0.248433
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
388 0 0 1 0 0 1 0 0 0 1 0 1 -0.717056 -0.498056
389 3 1 1 0 0 0 1 0 0 1 0 1 -1.855491 -0.259261
390 0 0 0 1 0 0 1 1 0 0 0 1 -0.565265 1.038659
391 0 1 0 1 0 0 1 1 0 0 1 0 1.559813 0.069140
392 0 2 1 0 0 0 1 0 0 1 0 1 -1.324222 -0.274045
393 0 0 1 0 0 0 1 0 1 0 0 1 1.256230 -0.448774
394 3 1 1 0 0 0 1 0 0 1 0 1 -0.109891 -0.242236
395 1 0 0 1 0 0 1 1 0 0 1 0 -0.944743 0.438310
396 0 0 1 0 0 1 0 0 0 1 0 1 -0.489370 -0.507017
397 1 1 0 1 1 0 0 1 0 0 1 0 1.332126 0.782391
398 0 0 1 0 0 0 1 0 0 1 0 1 -0.641161 -0.497608
399 0 0 1 0 0 1 0 0 0 1 0 1 0.041900 -0.498356
400 0 0 0 1 0 0 1 1 0 0 1 0 -0.033996 2.317614
401 1 0 1 0 0 0 1 0 1 0 0 1 0.573169 -0.260605
402 0 1 1 0 1 0 0 1 0 0 1 0 -0.641161 0.427557
403 0 0 1 0 0 0 1 1 0 0 0 1 -1.020639 0.207130
404 1 0 0 1 1 0 0 1 0 0 0 1 0.952648 -0.140162
405 0 0 0 1 1 0 0 0 1 0 0 1 -0.792952 -0.388515
406 1 0 1 0 0 0 1 0 1 0 0 1 -0.565265 -0.448774
407 1 1 0 1 1 0 0 1 0 0 0 1 1.483917 3.153324
408 0 0 1 0 0 1 0 0 0 1 1 0 -0.800877 -0.498580
409 1 1 1 0 0 0 1 0 0 1 1 0 -2.083178 -0.390083
410 0 0 1 0 0 1 0 0 0 1 1 0 0.367934 -0.498056
411 1 0 0 1 0 1 0 1 0 0 1 0 0.497274 0.975936
412 0 0 1 0 0 0 1 0 0 1 1 0 -0.185787 -0.497608
413 0 0 1 0 0 0 1 0 0 1 0 1 0.019566 -0.492680
414 0 0 0 1 1 0 0 1 0 0 1 0 0.649065 1.314641
415 0 0 1 0 0 0 1 0 0 1 0 1 0.611117 -0.507017
416 0 0 1 0 0 0 1 0 0 1 0 1 0.019566 -0.492680
417 1 1 1 0 1 0 0 0 0 1 0 1 -0.353251 -0.236263

418 rows × 14 columns


In [94]:
result = pd.DataFrame({'PassengerId':data_test['PassengerId'].as_matrix(),'Survied':predictions.astype(np.int32)})

In [95]:
result


Out[95]:
PassengerId Survied
0 892 0
1 893 0
2 894 0
3 895 0
4 896 1
5 897 0
6 898 1
7 899 0
8 900 1
9 901 0
10 902 0
11 903 0
12 904 1
13 905 0
14 906 1
15 907 1
16 908 0
17 909 0
18 910 1
19 911 1
20 912 0
21 913 0
22 914 1
23 915 0
24 916 1
25 917 0
26 918 1
27 919 0
28 920 0
29 921 0
... ... ...
388 1280 0
389 1281 0
390 1282 1
391 1283 1
392 1284 0
393 1285 0
394 1286 0
395 1287 1
396 1288 0
397 1289 1
398 1290 0
399 1291 0
400 1292 1
401 1293 0
402 1294 1
403 1295 0
404 1296 0
405 1297 1
406 1298 0
407 1299 0
408 1300 1
409 1301 1
410 1302 1
411 1303 1
412 1304 1
413 1305 0
414 1306 1
415 1307 0
416 1308 0
417 1309 0

418 rows × 2 columns


In [96]:
result.to_csv('result.csv')

In [ ]: