Data explore



In [262]:

    
import pandas as pd #数据分析
import numpy as np #科学计算
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
%matplotlib inline
data_train = pd.read_csv("./input/titanic/train.csv")
data_train.columns









    Out[262]:





Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')



In [263]:

    
data_train.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB



In [264]:

    
data_train.describe()









    Out[264]:







  
    
      
      PassengerId
      Survived
      Pclass
      Age
      SibSp
      Parch
      Fare
    
  
  
    
      count
      891.000000
      891.000000
      891.000000
      714.000000
      891.000000
      891.000000
      891.000000
    
    
      mean
      446.000000
      0.383838
      2.308642
      29.699118
      0.523008
      0.381594
      32.204208
    
    
      std
      257.353842
      0.486592
      0.836071
      14.526497
      1.102743
      0.806057
      49.693429
    
    
      min
      1.000000
      0.000000
      1.000000
      0.420000
      0.000000
      0.000000
      0.000000
    
    
      25%
      223.500000
      0.000000
      2.000000
      20.125000
      0.000000
      0.000000
      7.910400
    
    
      50%
      446.000000
      0.000000
      3.000000
      28.000000
      0.000000
      0.000000
      14.454200
    
    
      75%
      668.500000
      1.000000
      3.000000
      38.000000
      1.000000
      0.000000
      31.000000
    
    
      max
      891.000000
      1.000000
      3.000000
      80.000000
      8.000000
      6.000000
      512.329200



In [265]:

    
fig=plt.figure()
fig.set_alpha(0.5)
plt.subplot2grid((2,3),(0,0))
data_train.Survived.value_counts().plot(kind = 'bar')
plt.title(u'获救人数1为获救')
plt.ylabel(u'人数')
plt.subplot2grid((2,3),(0,1))
data_train.Pclass.value_counts().plot(kind='bar')
plt.title(u'乘客等级分布')
plt.ylabel(u'人数')
plt.subplot2grid((2,3),(0,2))
plt.scatter(data_train.Survived, data_train.Age)
plt.ylabel(u"年龄")                         # sets the y axis lable
plt.grid(b=True, which='major', axis='y') # formats the grid line style of our graphs
plt.title(u"按年龄看获救分布 (1为获救)")
plt.subplot2grid((2,3),(1,0), colspan=2)
data_train.Age[data_train.Pclass == 1].plot(kind='kde')   # plots a kernel desnsity estimate of the subset of the 1st class passanges's age
data_train.Age[data_train.Pclass == 2].plot(kind='kde')
data_train.Age[data_train.Pclass == 3].plot(kind='kde')
plt.xlabel(u"年龄")# plots an axis lable
plt.ylabel(u"密度") 
plt.title(u"各等级的乘客年龄分布")
plt.legend((u'头等舱', u'2等舱',u'3等舱'),loc='best') # sets our legend for our graph.

plt.subplot2grid((2,3),(1,2))
data_train.Embarked.value_counts().plot(kind='bar')
plt.title(u"各登船口岸上船人数")
plt.ylabel(u"人数")  


plt.show()



In [266]:

    
#看看各乘客等级的获救情况
fig = plt.figure()
fig.set(alpha=0.2)  # 设定图表颜色alpha参数

Survived_0 = data_train.Pclass[data_train.Survived == 0].value_counts()
Survived_1 = data_train.Pclass[data_train.Survived == 1].value_counts()
df=pd.DataFrame({u'获救':Survived_1, u'未获救':Survived_0})
df.plot(kind='bar', stacked=True)
plt.title(u"各乘客等级的获救情况")
plt.xlabel(u"乘客等级") 
plt.ylabel(u"人数") 

plt.show()









    





<matplotlib.figure.Figure at 0xd43e0b8>



In [267]:

    
#看看各登录港口的获救情况
fig = plt.figure()
fig.set(alpha=0.2)  # 设定图表颜色alpha参数

Survived_0 = data_train.Embarked[data_train.Survived == 0].value_counts()
Survived_1 = data_train.Embarked[data_train.Survived == 1].value_counts()
df=pd.DataFrame({u'获救':Survived_1, u'未获救':Survived_0})
df.plot(kind='bar', stacked=True)
plt.title(u"各登录港口乘客的获救情况")
plt.xlabel(u"登录港口") 
plt.ylabel(u"人数") 

plt.show()









    





<matplotlib.figure.Figure at 0xa8d5e10>



In [268]:

    
#看看各性别的获救情况
fig = plt.figure()
fig.set(alpha=0.2)  # 设定图表颜色alpha参数

Survived_m = data_train.Survived[data_train.Sex == 'male'].value_counts()
Survived_f = data_train.Survived[data_train.Sex == 'female'].value_counts()
df=pd.DataFrame({u'男性':Survived_m, u'女性':Survived_f})
df.plot(kind='bar', stacked=True)
plt.title(u"按性别看获救情况")
plt.xlabel(u"性别") 
plt.ylabel(u"人数")
plt.show()









    





<matplotlib.figure.Figure at 0xd482208>



In [269]:

    
fig=plt.figure()
fig.set(alpha=0.65) # 设置图像透明度，无所谓
plt.title(u"根据舱等级和性别的获救情况")

ax1=fig.add_subplot(141)
data_train.Survived[data_train.Sex == 'female'][data_train.Pclass != 3].value_counts().plot(kind='bar', label="female highclass", color='#FA2479')
ax1.set_xticklabels([u"获救", u"未获救"], rotation=0)
ax1.legend([u"女性/高级舱"], loc='best')

ax2=fig.add_subplot(142, sharey=ax1)
data_train.Survived[data_train.Sex == 'female'][data_train.Pclass == 3].value_counts().plot(kind='bar', label='female, low class', color='pink')
ax2.set_xticklabels([u"未获救", u"获救"], rotation=0)
plt.legend([u"女性/低级舱"], loc='best')

ax3=fig.add_subplot(143, sharey=ax1)
data_train.Survived[data_train.Sex == 'male'][data_train.Pclass != 3].value_counts().plot(kind='bar', label='male, high class',color='lightblue')
ax3.set_xticklabels([u"未获救", u"获救"], rotation=0)
plt.legend([u"男性/高级舱"], loc='best')

ax4=fig.add_subplot(144, sharey=ax1)
data_train.Survived[data_train.Sex == 'male'][data_train.Pclass == 3].value_counts().plot(kind='bar', label='male low class', color='steelblue')
ax4.set_xticklabels([u"未获救", u"获救"], rotation=0)
plt.legend([u"男性/低级舱"], loc='best')

plt.show()



In [270]:

    
g = data_train.groupby(['SibSp','Survived'])
df = pd.DataFrame(g.count()['PassengerId'])
df









    Out[270]:







  
    
      
      
      PassengerId
    
    
      SibSp
      Survived
      
    
  
  
    
      0
      0
      398
    
    
      1
      210
    
    
      1
      0
      97
    
    
      1
      112
    
    
      2
      0
      15
    
    
      1
      13
    
    
      3
      0
      12
    
    
      1
      4
    
    
      4
      0
      15
    
    
      1
      3
    
    
      5
      0
      5
    
    
      8
      0
      7



In [271]:

    
g = data_train.groupby(['Parch','Survived'])
df = pd.DataFrame(g.count()['PassengerId'])
df









    Out[271]:







  
    
      
      
      PassengerId
    
    
      Parch
      Survived
      
    
  
  
    
      0
      0
      445
    
    
      1
      233
    
    
      1
      0
      53
    
    
      1
      65
    
    
      2
      0
      40
    
    
      1
      40
    
    
      3
      0
      2
    
    
      1
      3
    
    
      4
      0
      4
    
    
      5
      0
      4
    
    
      1
      1
    
    
      6
      0
      1

处理缺失值异常值



In [272]:

    
from sklearn.ensemble import RandomForestRegressor
 
### 使用 RandomForestClassifier 填补缺失的年龄属性
def set_missing_ages(df):
    
    # 把已有的数值型特征取出来丢进Random Forest Regressor中
    age_df = df[['Age','Fare', 'Parch', 'SibSp', 'Pclass']]

    # 乘客分成已知年龄和未知年龄两部分
    known_age = age_df[age_df.Age.notnull()].as_matrix()
    unknown_age = age_df[age_df.Age.isnull()].as_matrix()

    # y即目标年龄
    y = known_age[:, 0]

    # X即特征属性值
    X = known_age[:, 1:]

    # fit到RandomForestRegressor之中
    rfr = RandomForestRegressor(random_state=0, n_estimators=2000, n_jobs=-1)
    rfr.fit(X, y)
    
    # 用得到的模型进行未知年龄结果预测
    predictedAges = rfr.predict(unknown_age[:, 1::])
    
    # 用得到的预测结果填补原缺失数据
    df.loc[ (df.Age.isnull()), 'Age' ] = predictedAges
    
    return df, rfr

def set_Cabin_type(df):
    df.loc[ (df.Cabin.notnull()), 'Cabin' ] = "Yes"
    df.loc[ (df.Cabin.isnull()), 'Cabin' ] = "No"
    return df

data_train, rfr = set_missing_ages(data_train)
data_train = set_Cabin_type(data_train)
data_train









    Out[272]:







  
    
      
      PassengerId
      Survived
      Pclass
      Name
      Sex
      Age
      SibSp
      Parch
      Ticket
      Fare
      Cabin
      Embarked
    
  
  
    
      0
      1
      0
      3
      Braund, Mr. Owen Harris
      male
      22.000000
      1
      0
      A/5 21171
      7.2500
      No
      S
    
    
      1
      2
      1
      1
      Cumings, Mrs. John Bradley (Florence Briggs Th...
      female
      38.000000
      1
      0
      PC 17599
      71.2833
      Yes
      C
    
    
      2
      3
      1
      3
      Heikkinen, Miss. Laina
      female
      26.000000
      0
      0
      STON/O2. 3101282
      7.9250
      No
      S
    
    
      3
      4
      1
      1
      Futrelle, Mrs. Jacques Heath (Lily May Peel)
      female
      35.000000
      1
      0
      113803
      53.1000
      Yes
      S
    
    
      4
      5
      0
      3
      Allen, Mr. William Henry
      male
      35.000000
      0
      0
      373450
      8.0500
      No
      S
    
    
      5
      6
      0
      3
      Moran, Mr. James
      male
      23.828953
      0
      0
      330877
      8.4583
      No
      Q
    
    
      6
      7
      0
      1
      McCarthy, Mr. Timothy J
      male
      54.000000
      0
      0
      17463
      51.8625
      Yes
      S
    
    
      7
      8
      0
      3
      Palsson, Master. Gosta Leonard
      male
      2.000000
      3
      1
      349909
      21.0750
      No
      S
    
    
      8
      9
      1
      3
      Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
      female
      27.000000
      0
      2
      347742
      11.1333
      No
      S
    
    
      9
      10
      1
      2
      Nasser, Mrs. Nicholas (Adele Achem)
      female
      14.000000
      1
      0
      237736
      30.0708
      No
      C
    
    
      10
      11
      1
      3
      Sandstrom, Miss. Marguerite Rut
      female
      4.000000
      1
      1
      PP 9549
      16.7000
      Yes
      S
    
    
      11
      12
      1
      1
      Bonnell, Miss. Elizabeth
      female
      58.000000
      0
      0
      113783
      26.5500
      Yes
      S
    
    
      12
      13
      0
      3
      Saundercock, Mr. William Henry
      male
      20.000000
      0
      0
      A/5. 2151
      8.0500
      No
      S
    
    
      13
      14
      0
      3
      Andersson, Mr. Anders Johan
      male
      39.000000
      1
      5
      347082
      31.2750
      No
      S
    
    
      14
      15
      0
      3
      Vestrom, Miss. Hulda Amanda Adolfina
      female
      14.000000
      0
      0
      350406
      7.8542
      No
      S
    
    
      15
      16
      1
      2
      Hewlett, Mrs. (Mary D Kingcome)
      female
      55.000000
      0
      0
      248706
      16.0000
      No
      S
    
    
      16
      17
      0
      3
      Rice, Master. Eugene
      male
      2.000000
      4
      1
      382652
      29.1250
      No
      Q
    
    
      17
      18
      1
      2
      Williams, Mr. Charles Eugene
      male
      32.066493
      0
      0
      244373
      13.0000
      No
      S
    
    
      18
      19
      0
      3
      Vander Planke, Mrs. Julius (Emelia Maria Vande...
      female
      31.000000
      1
      0
      345763
      18.0000
      No
      S
    
    
      19
      20
      1
      3
      Masselmani, Mrs. Fatima
      female
      29.518205
      0
      0
      2649
      7.2250
      No
      C
    
    
      20
      21
      0
      2
      Fynney, Mr. Joseph J
      male
      35.000000
      0
      0
      239865
      26.0000
      No
      S
    
    
      21
      22
      1
      2
      Beesley, Mr. Lawrence
      male
      34.000000
      0
      0
      248698
      13.0000
      Yes
      S
    
    
      22
      23
      1
      3
      McGowan, Miss. Anna "Annie"
      female
      15.000000
      0
      0
      330923
      8.0292
      No
      Q
    
    
      23
      24
      1
      1
      Sloper, Mr. William Thompson
      male
      28.000000
      0
      0
      113788
      35.5000
      Yes
      S
    
    
      24
      25
      0
      3
      Palsson, Miss. Torborg Danira
      female
      8.000000
      3
      1
      349909
      21.0750
      No
      S
    
    
      25
      26
      1
      3
      Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
      female
      38.000000
      1
      5
      347077
      31.3875
      No
      S
    
    
      26
      27
      0
      3
      Emir, Mr. Farred Chehab
      male
      29.518205
      0
      0
      2631
      7.2250
      No
      C
    
    
      27
      28
      0
      1
      Fortune, Mr. Charles Alexander
      male
      19.000000
      3
      2
      19950
      263.0000
      Yes
      S
    
    
      28
      29
      1
      3
      O'Dwyer, Miss. Ellen "Nellie"
      female
      22.380113
      0
      0
      330959
      7.8792
      No
      Q
    
    
      29
      30
      0
      3
      Todoroff, Mr. Lalio
      male
      27.947206
      0
      0
      349216
      7.8958
      No
      S
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      861
      862
      0
      2
      Giles, Mr. Frederick Edward
      male
      21.000000
      1
      0
      28134
      11.5000
      No
      S
    
    
      862
      863
      1
      1
      Swift, Mrs. Frederick Joel (Margaret Welles Ba...
      female
      48.000000
      0
      0
      17466
      25.9292
      Yes
      S
    
    
      863
      864
      0
      3
      Sage, Miss. Dorothy Edith "Dolly"
      female
      10.869867
      8
      2
      CA. 2343
      69.5500
      No
      S
    
    
      864
      865
      0
      2
      Gill, Mr. John William
      male
      24.000000
      0
      0
      233866
      13.0000
      No
      S
    
    
      865
      866
      1
      2
      Bystrom, Mrs. (Karolina)
      female
      42.000000
      0
      0
      236852
      13.0000
      No
      S
    
    
      866
      867
      1
      2
      Duran y More, Miss. Asuncion
      female
      27.000000
      1
      0
      SC/PARIS 2149
      13.8583
      No
      C
    
    
      867
      868
      0
      1
      Roebling, Mr. Washington Augustus II
      male
      31.000000
      0
      0
      PC 17590
      50.4958
      Yes
      S
    
    
      868
      869
      0
      3
      van Melkebeke, Mr. Philemon
      male
      25.977889
      0
      0
      345777
      9.5000
      No
      S
    
    
      869
      870
      1
      3
      Johnson, Master. Harold Theodor
      male
      4.000000
      1
      1
      347742
      11.1333
      No
      S
    
    
      870
      871
      0
      3
      Balkic, Mr. Cerin
      male
      26.000000
      0
      0
      349248
      7.8958
      No
      S
    
    
      871
      872
      1
      1
      Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
      female
      47.000000
      1
      1
      11751
      52.5542
      Yes
      S
    
    
      872
      873
      0
      1
      Carlsson, Mr. Frans Olof
      male
      33.000000
      0
      0
      695
      5.0000
      Yes
      S
    
    
      873
      874
      0
      3
      Vander Cruyssen, Mr. Victor
      male
      47.000000
      0
      0
      345765
      9.0000
      No
      S
    
    
      874
      875
      1
      2
      Abelson, Mrs. Samuel (Hannah Wizosky)
      female
      28.000000
      1
      0
      P/PP 3381
      24.0000
      No
      C
    
    
      875
      876
      1
      3
      Najib, Miss. Adele Kiamie "Jane"
      female
      15.000000
      0
      0
      2667
      7.2250
      No
      C
    
    
      876
      877
      0
      3
      Gustafsson, Mr. Alfred Ossian
      male
      20.000000
      0
      0
      7534
      9.8458
      No
      S
    
    
      877
      878
      0
      3
      Petroff, Mr. Nedelio
      male
      19.000000
      0
      0
      349212
      7.8958
      No
      S
    
    
      878
      879
      0
      3
      Laleff, Mr. Kristo
      male
      27.947206
      0
      0
      349217
      7.8958
      No
      S
    
    
      879
      880
      1
      1
      Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
      female
      56.000000
      0
      1
      11767
      83.1583
      Yes
      C
    
    
      880
      881
      1
      2
      Shelley, Mrs. William (Imanita Parrish Hall)
      female
      25.000000
      0
      1
      230433
      26.0000
      No
      S
    
    
      881
      882
      0
      3
      Markun, Mr. Johann
      male
      33.000000
      0
      0
      349257
      7.8958
      No
      S
    
    
      882
      883
      0
      3
      Dahlberg, Miss. Gerda Ulrika
      female
      22.000000
      0
      0
      7552
      10.5167
      No
      S
    
    
      883
      884
      0
      2
      Banfield, Mr. Frederick James
      male
      28.000000
      0
      0
      C.A./SOTON 34068
      10.5000
      No
      S
    
    
      884
      885
      0
      3
      Sutehall, Mr. Henry Jr
      male
      25.000000
      0
      0
      SOTON/OQ 392076
      7.0500
      No
      S
    
    
      885
      886
      0
      3
      Rice, Mrs. William (Margaret Norton)
      female
      39.000000
      0
      5
      382652
      29.1250
      No
      Q
    
    
      886
      887
      0
      2
      Montvila, Rev. Juozas
      male
      27.000000
      0
      0
      211536
      13.0000
      No
      S
    
    
      887
      888
      1
      1
      Graham, Miss. Margaret Edith
      female
      19.000000
      0
      0
      112053
      30.0000
      Yes
      S
    
    
      888
      889
      0
      3
      Johnston, Miss. Catherine Helen "Carrie"
      female
      16.127950
      1
      2
      W./C. 6607
      23.4500
      No
      S
    
    
      889
      890
      1
      1
      Behr, Mr. Karl Howell
      male
      26.000000
      0
      0
      111369
      30.0000
      Yes
      C
    
    
      890
      891
      0
      3
      Dooley, Mr. Patrick
      male
      32.000000
      0
      0
      370376
      7.7500
      No
      Q
    
  

891 rows × 12 columns

onehot编码



In [273]:

    
# 因为逻辑回归建模时，需要输入的特征都是数值型特征
# 我们先对类目型的特征离散/因子化
# 以Cabin为例，原本一个属性维度，因为其取值可以是['yes','no']，而将其平展开为'Cabin_yes','Cabin_no'两个属性
# 原本Cabin取值为yes的，在此处的'Cabin_yes'下取值为1，在'Cabin_no'下取值为0
# 原本Cabin取值为no的，在此处的'Cabin_yes'下取值为0，在'Cabin_no'下取值为1
# 我们使用pandas的get_dummies来完成这个工作，并拼接在原来的data_train之上，如下所示
dummies_Cabin = pd.get_dummies(data_train['Cabin'], prefix= 'Cabin')

dummies_Embarked = pd.get_dummies(data_train['Embarked'], prefix= 'Embarked')

dummies_Sex = pd.get_dummies(data_train['Sex'], prefix= 'Sex')

dummies_Pclass = pd.get_dummies(data_train['Pclass'], prefix= 'Pclass')

df = pd.concat([data_train, dummies_Cabin, dummies_Embarked, dummies_Sex, dummies_Pclass], axis=1)
df.drop(['Pclass', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)
df









    Out[273]:







  
    
      
      PassengerId
      Survived
      Age
      SibSp
      Parch
      Fare
      Cabin_No
      Cabin_Yes
      Embarked_C
      Embarked_Q
      Embarked_S
      Sex_female
      Sex_male
      Pclass_1
      Pclass_2
      Pclass_3
    
  
  
    
      0
      1
      0
      22.000000
      1
      0
      7.2500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      1
      2
      1
      38.000000
      1
      0
      71.2833
      0
      1
      1
      0
      0
      1
      0
      1
      0
      0
    
    
      2
      3
      1
      26.000000
      0
      0
      7.9250
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
    
    
      3
      4
      1
      35.000000
      1
      0
      53.1000
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
    
    
      4
      5
      0
      35.000000
      0
      0
      8.0500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      5
      6
      0
      23.828953
      0
      0
      8.4583
      1
      0
      0
      1
      0
      0
      1
      0
      0
      1
    
    
      6
      7
      0
      54.000000
      0
      0
      51.8625
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
    
    
      7
      8
      0
      2.000000
      3
      1
      21.0750
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      8
      9
      1
      27.000000
      0
      2
      11.1333
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
    
    
      9
      10
      1
      14.000000
      1
      0
      30.0708
      1
      0
      1
      0
      0
      1
      0
      0
      1
      0
    
    
      10
      11
      1
      4.000000
      1
      1
      16.7000
      0
      1
      0
      0
      1
      1
      0
      0
      0
      1
    
    
      11
      12
      1
      58.000000
      0
      0
      26.5500
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
    
    
      12
      13
      0
      20.000000
      0
      0
      8.0500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      13
      14
      0
      39.000000
      1
      5
      31.2750
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      14
      15
      0
      14.000000
      0
      0
      7.8542
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
    
    
      15
      16
      1
      55.000000
      0
      0
      16.0000
      1
      0
      0
      0
      1
      1
      0
      0
      1
      0
    
    
      16
      17
      0
      2.000000
      4
      1
      29.1250
      1
      0
      0
      1
      0
      0
      1
      0
      0
      1
    
    
      17
      18
      1
      32.066493
      0
      0
      13.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
    
    
      18
      19
      0
      31.000000
      1
      0
      18.0000
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
    
    
      19
      20
      1
      29.518205
      0
      0
      7.2250
      1
      0
      1
      0
      0
      1
      0
      0
      0
      1
    
    
      20
      21
      0
      35.000000
      0
      0
      26.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
    
    
      21
      22
      1
      34.000000
      0
      0
      13.0000
      0
      1
      0
      0
      1
      0
      1
      0
      1
      0
    
    
      22
      23
      1
      15.000000
      0
      0
      8.0292
      1
      0
      0
      1
      0
      1
      0
      0
      0
      1
    
    
      23
      24
      1
      28.000000
      0
      0
      35.5000
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
    
    
      24
      25
      0
      8.000000
      3
      1
      21.0750
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
    
    
      25
      26
      1
      38.000000
      1
      5
      31.3875
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
    
    
      26
      27
      0
      29.518205
      0
      0
      7.2250
      1
      0
      1
      0
      0
      0
      1
      0
      0
      1
    
    
      27
      28
      0
      19.000000
      3
      2
      263.0000
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
    
    
      28
      29
      1
      22.380113
      0
      0
      7.8792
      1
      0
      0
      1
      0
      1
      0
      0
      0
      1
    
    
      29
      30
      0
      27.947206
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      861
      862
      0
      21.000000
      1
      0
      11.5000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
    
    
      862
      863
      1
      48.000000
      0
      0
      25.9292
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
    
    
      863
      864
      0
      10.869867
      8
      2
      69.5500
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
    
    
      864
      865
      0
      24.000000
      0
      0
      13.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
    
    
      865
      866
      1
      42.000000
      0
      0
      13.0000
      1
      0
      0
      0
      1
      1
      0
      0
      1
      0
    
    
      866
      867
      1
      27.000000
      1
      0
      13.8583
      1
      0
      1
      0
      0
      1
      0
      0
      1
      0
    
    
      867
      868
      0
      31.000000
      0
      0
      50.4958
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
    
    
      868
      869
      0
      25.977889
      0
      0
      9.5000
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      869
      870
      1
      4.000000
      1
      1
      11.1333
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      870
      871
      0
      26.000000
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      871
      872
      1
      47.000000
      1
      1
      52.5542
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
    
    
      872
      873
      0
      33.000000
      0
      0
      5.0000
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
    
    
      873
      874
      0
      47.000000
      0
      0
      9.0000
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      874
      875
      1
      28.000000
      1
      0
      24.0000
      1
      0
      1
      0
      0
      1
      0
      0
      1
      0
    
    
      875
      876
      1
      15.000000
      0
      0
      7.2250
      1
      0
      1
      0
      0
      1
      0
      0
      0
      1
    
    
      876
      877
      0
      20.000000
      0
      0
      9.8458
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      877
      878
      0
      19.000000
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      878
      879
      0
      27.947206
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      879
      880
      1
      56.000000
      0
      1
      83.1583
      0
      1
      1
      0
      0
      1
      0
      1
      0
      0
    
    
      880
      881
      1
      25.000000
      0
      1
      26.0000
      1
      0
      0
      0
      1
      1
      0
      0
      1
      0
    
    
      881
      882
      0
      33.000000
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      882
      883
      0
      22.000000
      0
      0
      10.5167
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
    
    
      883
      884
      0
      28.000000
      0
      0
      10.5000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
    
    
      884
      885
      0
      25.000000
      0
      0
      7.0500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
    
    
      885
      886
      0
      39.000000
      0
      5
      29.1250
      1
      0
      0
      1
      0
      1
      0
      0
      0
      1
    
    
      886
      887
      0
      27.000000
      0
      0
      13.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
    
    
      887
      888
      1
      19.000000
      0
      0
      30.0000
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
    
    
      888
      889
      0
      16.127950
      1
      2
      23.4500
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
    
    
      889
      890
      1
      26.000000
      0
      0
      30.0000
      0
      1
      1
      0
      0
      0
      1
      1
      0
      0
    
    
      890
      891
      0
      32.000000
      0
      0
      7.7500
      1
      0
      0
      1
      0
      0
      1
      0
      0
      1
    
  

891 rows × 16 columns



In [274]:

    
# 接下来我们要接着做一些数据预处理的工作，比如scaling，将一些变化幅度较大的特征化到[-1,1]之内
# 这样可以加速logistic regression的收敛
import sklearn.preprocessing as preprocessing
scaler = preprocessing.StandardScaler()
age_scale_param = scaler.fit(df['Age'])
df['Age_scaled'] = scaler.fit_transform(df['Age'], age_scale_param)
fare_scale_param = scaler.fit(df['Fare'])
df['Fare_scaled'] = scaler.fit_transform(df['Fare'], fare_scale_param)
df









    



D:\soft\anaconda3\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\soft\anaconda3\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\soft\anaconda3\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\soft\anaconda3\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\soft\anaconda3\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\soft\anaconda3\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)






    Out[274]:







  
    
      
      PassengerId
      Survived
      Age
      SibSp
      Parch
      Fare
      Cabin_No
      Cabin_Yes
      Embarked_C
      Embarked_Q
      Embarked_S
      Sex_female
      Sex_male
      Pclass_1
      Pclass_2
      Pclass_3
      Age_scaled
      Fare_scaled
    
  
  
    
      0
      1
      0
      22.000000
      1
      0
      7.2500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.561363
      -0.502445
    
    
      1
      2
      1
      38.000000
      1
      0
      71.2833
      0
      1
      1
      0
      0
      1
      0
      1
      0
      0
      0.613182
      0.786845
    
    
      2
      3
      1
      26.000000
      0
      0
      7.9250
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -0.267727
      -0.488854
    
    
      3
      4
      1
      35.000000
      1
      0
      53.1000
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
      0.392955
      0.420730
    
    
      4
      5
      0
      35.000000
      0
      0
      8.0500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      0.392955
      -0.486337
    
    
      5
      6
      0
      23.828953
      0
      0
      8.4583
      1
      0
      0
      1
      0
      0
      1
      0
      0
      1
      -0.427102
      -0.478116
    
    
      6
      7
      0
      54.000000
      0
      0
      51.8625
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      1.787727
      0.395814
    
    
      7
      8
      0
      2.000000
      3
      1
      21.0750
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -2.029545
      -0.224083
    
    
      8
      9
      1
      27.000000
      0
      2
      11.1333
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -0.194318
      -0.424256
    
    
      9
      10
      1
      14.000000
      1
      0
      30.0708
      1
      0
      1
      0
      0
      1
      0
      0
      1
      0
      -1.148636
      -0.042956
    
    
      10
      11
      1
      4.000000
      1
      1
      16.7000
      0
      1
      0
      0
      1
      1
      0
      0
      0
      1
      -1.882726
      -0.312172
    
    
      11
      12
      1
      58.000000
      0
      0
      26.5500
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
      2.081363
      -0.113846
    
    
      12
      13
      0
      20.000000
      0
      0
      8.0500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.708181
      -0.486337
    
    
      13
      14
      0
      39.000000
      1
      5
      31.2750
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      0.686591
      -0.018709
    
    
      14
      15
      0
      14.000000
      0
      0
      7.8542
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -1.148636
      -0.490280
    
    
      15
      16
      1
      55.000000
      0
      0
      16.0000
      1
      0
      0
      0
      1
      1
      0
      0
      1
      0
      1.861136
      -0.326267
    
    
      16
      17
      0
      2.000000
      4
      1
      29.1250
      1
      0
      0
      1
      0
      0
      1
      0
      0
      1
      -2.029545
      -0.061999
    
    
      17
      18
      1
      32.066493
      0
      0
      13.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      0.177609
      -0.386671
    
    
      18
      19
      0
      31.000000
      1
      0
      18.0000
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      0.099318
      -0.285997
    
    
      19
      20
      1
      29.518205
      0
      0
      7.2250
      1
      0
      1
      0
      0
      1
      0
      0
      0
      1
      -0.009459
      -0.502949
    
    
      20
      21
      0
      35.000000
      0
      0
      26.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      0.392955
      -0.124920
    
    
      21
      22
      1
      34.000000
      0
      0
      13.0000
      0
      1
      0
      0
      1
      0
      1
      0
      1
      0
      0.319546
      -0.386671
    
    
      22
      23
      1
      15.000000
      0
      0
      8.0292
      1
      0
      0
      1
      0
      1
      0
      0
      0
      1
      -1.075227
      -0.486756
    
    
      23
      24
      1
      28.000000
      0
      0
      35.5000
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      -0.120909
      0.066360
    
    
      24
      25
      0
      8.000000
      3
      1
      21.0750
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -1.589090
      -0.224083
    
    
      25
      26
      1
      38.000000
      1
      5
      31.3875
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      0.613182
      -0.016444
    
    
      26
      27
      0
      29.518205
      0
      0
      7.2250
      1
      0
      1
      0
      0
      0
      1
      0
      0
      1
      -0.009459
      -0.502949
    
    
      27
      28
      0
      19.000000
      3
      2
      263.0000
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      -0.781590
      4.647001
    
    
      28
      29
      1
      22.380113
      0
      0
      7.8792
      1
      0
      0
      1
      0
      1
      0
      0
      0
      1
      -0.533459
      -0.489776
    
    
      29
      30
      0
      27.947206
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.124784
      -0.489442
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      861
      862
      0
      21.000000
      1
      0
      11.5000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      -0.634772
      -0.416873
    
    
      862
      863
      1
      48.000000
      0
      0
      25.9292
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
      1.347272
      -0.126345
    
    
      863
      864
      0
      10.869867
      8
      2
      69.5500
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -1.378416
      0.751946
    
    
      864
      865
      0
      24.000000
      0
      0
      13.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      -0.414545
      -0.386671
    
    
      865
      866
      1
      42.000000
      0
      0
      13.0000
      1
      0
      0
      0
      1
      1
      0
      0
      1
      0
      0.906818
      -0.386671
    
    
      866
      867
      1
      27.000000
      1
      0
      13.8583
      1
      0
      1
      0
      0
      1
      0
      0
      1
      0
      -0.194318
      -0.369389
    
    
      867
      868
      0
      31.000000
      0
      0
      50.4958
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0.099318
      0.368295
    
    
      868
      869
      0
      25.977889
      0
      0
      9.5000
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.269350
      -0.457142
    
    
      869
      870
      1
      4.000000
      1
      1
      11.1333
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -1.882726
      -0.424256
    
    
      870
      871
      0
      26.000000
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.267727
      -0.489442
    
    
      871
      872
      1
      47.000000
      1
      1
      52.5542
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
      1.273863
      0.409741
    
    
      872
      873
      0
      33.000000
      0
      0
      5.0000
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0.246136
      -0.547748
    
    
      873
      874
      0
      47.000000
      0
      0
      9.0000
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      1.273863
      -0.467209
    
    
      874
      875
      1
      28.000000
      1
      0
      24.0000
      1
      0
      1
      0
      0
      1
      0
      0
      1
      0
      -0.120909
      -0.165189
    
    
      875
      876
      1
      15.000000
      0
      0
      7.2250
      1
      0
      1
      0
      0
      1
      0
      0
      0
      1
      -1.075227
      -0.502949
    
    
      876
      877
      0
      20.000000
      0
      0
      9.8458
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.708181
      -0.450180
    
    
      877
      878
      0
      19.000000
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.781590
      -0.489442
    
    
      878
      879
      0
      27.947206
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.124784
      -0.489442
    
    
      879
      880
      1
      56.000000
      0
      1
      83.1583
      0
      1
      1
      0
      0
      1
      0
      1
      0
      0
      1.934545
      1.025945
    
    
      880
      881
      1
      25.000000
      0
      1
      26.0000
      1
      0
      0
      0
      1
      1
      0
      0
      1
      0
      -0.341136
      -0.124920
    
    
      881
      882
      0
      33.000000
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      0.246136
      -0.489442
    
    
      882
      883
      0
      22.000000
      0
      0
      10.5167
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -0.561363
      -0.436671
    
    
      883
      884
      0
      28.000000
      0
      0
      10.5000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      -0.120909
      -0.437007
    
    
      884
      885
      0
      25.000000
      0
      0
      7.0500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.341136
      -0.506472
    
    
      885
      886
      0
      39.000000
      0
      5
      29.1250
      1
      0
      0
      1
      0
      1
      0
      0
      0
      1
      0.686591
      -0.061999
    
    
      886
      887
      0
      27.000000
      0
      0
      13.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      -0.194318
      -0.386671
    
    
      887
      888
      1
      19.000000
      0
      0
      30.0000
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
      -0.781590
      -0.044381
    
    
      888
      889
      0
      16.127950
      1
      2
      23.4500
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -0.992425
      -0.176263
    
    
      889
      890
      1
      26.000000
      0
      0
      30.0000
      0
      1
      1
      0
      0
      0
      1
      1
      0
      0
      -0.267727
      -0.044381
    
    
      890
      891
      0
      32.000000
      0
      0
      7.7500
      1
      0
      0
      1
      0
      0
      1
      0
      0
      1
      0.172727
      -0.492378
    
  

891 rows × 18 columns

建模



In [275]:

    
# 我们把需要的feature字段取出来，转成numpy格式，使用scikit-learn中的LogisticRegression建模
from sklearn import linear_model

train_df = df.filter(regex='Survived|Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
train_np = train_df.as_matrix()

# y即Survival结果
y = train_np[:, 0]

# X即特征属性值
X = train_np[:, 1:]

# fit到RandomForestRegressor之中
clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
clf.fit(X, y)
    
clf









    Out[275]:





LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=None, solver='liblinear', tol=1e-06,
          verbose=0, warm_start=False)



In [276]:

    
data_test = pd.read_csv("./input/titanic/test.csv")
data_test.loc[ (data_test.Fare.isnull()), 'Fare' ] = 0
# 接着我们对test_data做和train_data中一致的特征变换
# 首先用同样的RandomForestRegressor模型填上丢失的年龄
tmp_df = data_test[['Age','Fare', 'Parch', 'SibSp', 'Pclass']]
null_age = tmp_df[data_test.Age.isnull()].as_matrix()
# 根据特征属性X预测年龄并补上
X = null_age[:, 1:]
predictedAges = rfr.predict(X)
data_test.loc[ (data_test.Age.isnull()), 'Age' ] = predictedAges

data_test = set_Cabin_type(data_test)
dummies_Cabin = pd.get_dummies(data_test['Cabin'], prefix= 'Cabin')
dummies_Embarked = pd.get_dummies(data_test['Embarked'], prefix= 'Embarked')
dummies_Sex = pd.get_dummies(data_test['Sex'], prefix= 'Sex')
dummies_Pclass = pd.get_dummies(data_test['Pclass'], prefix= 'Pclass')


df_test = pd.concat([data_test, dummies_Cabin, dummies_Embarked, dummies_Sex, dummies_Pclass], axis=1)
df_test.drop(['Pclass', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)
df_test['Age_scaled'] = scaler.fit_transform(df_test['Age'], age_scale_param)
df_test['Fare_scaled'] = scaler.fit_transform(df_test['Fare'], fare_scale_param)
df_test









    



D:\soft\anaconda3\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\soft\anaconda3\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\soft\anaconda3\lib\site-packages\sklearn\preprocessing\data.py:586: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
D:\soft\anaconda3\lib\site-packages\sklearn\preprocessing\data.py:649: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)






    Out[276]:







  
    
      
      PassengerId
      Age
      SibSp
      Parch
      Fare
      Cabin_No
      Cabin_Yes
      Embarked_C
      Embarked_Q
      Embarked_S
      Sex_female
      Sex_male
      Pclass_1
      Pclass_2
      Pclass_3
      Age_scaled
      Fare_scaled
    
  
  
    
      0
      892
      34.500000
      0
      0
      7.8292
      1
      0
      0
      1
      0
      0
      1
      0
      0
      1
      0.307535
      -0.496637
    
    
      1
      893
      47.000000
      1
      0
      7.0000
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      1.256230
      -0.511497
    
    
      2
      894
      62.000000
      0
      0
      9.6875
      1
      0
      0
      1
      0
      0
      1
      0
      1
      0
      2.394665
      -0.463335
    
    
      3
      895
      27.000000
      0
      0
      8.6625
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.261683
      -0.481704
    
    
      4
      896
      22.000000
      1
      1
      12.2875
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -0.641161
      -0.416740
    
    
      5
      897
      14.000000
      0
      0
      9.2250
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -1.248326
      -0.471623
    
    
      6
      898
      30.000000
      0
      0
      7.6292
      1
      0
      0
      1
      0
      1
      0
      0
      0
      1
      -0.033996
      -0.500221
    
    
      7
      899
      26.000000
      1
      1
      29.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      -0.337578
      -0.117238
    
    
      8
      900
      18.000000
      0
      0
      7.2292
      1
      0
      1
      0
      0
      1
      0
      0
      0
      1
      -0.944743
      -0.507390
    
    
      9
      901
      21.000000
      2
      0
      24.1500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.717056
      -0.204154
    
    
      10
      902
      27.947206
      0
      0
      7.8958
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.189794
      -0.495444
    
    
      11
      903
      46.000000
      0
      0
      26.0000
      1
      0
      0
      0
      1
      0
      1
      1
      0
      0
      1.180334
      -0.171000
    
    
      12
      904
      23.000000
      1
      0
      82.2667
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
      -0.565265
      0.837349
    
    
      13
      905
      63.000000
      1
      0
      26.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      2.470560
      -0.171000
    
    
      14
      906
      47.000000
      1
      0
      61.1750
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
      1.256230
      0.459367
    
    
      15
      907
      24.000000
      1
      0
      27.7208
      1
      0
      1
      0
      0
      1
      0
      0
      1
      0
      -0.489370
      -0.140162
    
    
      16
      908
      35.000000
      0
      0
      12.3500
      1
      0
      0
      1
      0
      0
      1
      0
      1
      0
      0.345482
      -0.415620
    
    
      17
      909
      21.000000
      0
      0
      7.2250
      1
      0
      1
      0
      0
      0
      1
      0
      0
      1
      -0.717056
      -0.507465
    
    
      18
      910
      27.000000
      1
      0
      7.9250
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -0.261683
      -0.494920
    
    
      19
      911
      45.000000
      0
      0
      7.2250
      1
      0
      1
      0
      0
      1
      0
      0
      0
      1
      1.104439
      -0.507465
    
    
      20
      912
      55.000000
      1
      0
      59.4000
      1
      0
      1
      0
      0
      0
      1
      1
      0
      0
      1.863395
      0.427557
    
    
      21
      913
      9.000000
      0
      1
      3.1708
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -1.627804
      -0.580120
    
    
      22
      914
      52.314311
      0
      0
      31.6833
      1
      0
      0
      0
      1
      1
      0
      1
      0
      0
      1.659563
      -0.069151
    
    
      23
      915
      21.000000
      0
      1
      61.3792
      1
      0
      1
      0
      0
      0
      1
      1
      0
      0
      -0.717056
      0.463026
    
    
      24
      916
      48.000000
      1
      3
      262.3750
      0
      1
      1
      0
      0
      1
      0
      1
      0
      0
      1.332126
      4.065049
    
    
      25
      917
      50.000000
      1
      0
      14.5000
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      1.483917
      -0.377090
    
    
      26
      918
      22.000000
      0
      1
      61.9792
      0
      1
      1
      0
      0
      1
      0
      1
      0
      0
      -0.641161
      0.473779
    
    
      27
      919
      22.500000
      0
      0
      7.2250
      1
      0
      1
      0
      0
      0
      1
      0
      0
      1
      -0.603213
      -0.507465
    
    
      28
      920
      41.000000
      0
      0
      30.5000
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      0.800856
      -0.090356
    
    
      29
      921
      23.458621
      2
      0
      21.6792
      1
      0
      1
      0
      0
      0
      1
      0
      0
      1
      -0.530458
      -0.248433
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      388
      1280
      21.000000
      0
      0
      7.7500
      1
      0
      0
      1
      0
      0
      1
      0
      0
      1
      -0.717056
      -0.498056
    
    
      389
      1281
      6.000000
      3
      1
      21.0750
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -1.855491
      -0.259261
    
    
      390
      1282
      23.000000
      0
      0
      93.5000
      0
      1
      0
      0
      1
      0
      1
      1
      0
      0
      -0.565265
      1.038659
    
    
      391
      1283
      51.000000
      0
      1
      39.4000
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
      1.559813
      0.069140
    
    
      392
      1284
      13.000000
      0
      2
      20.2500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -1.324222
      -0.274045
    
    
      393
      1285
      47.000000
      0
      0
      10.5000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      1.256230
      -0.448774
    
    
      394
      1286
      29.000000
      3
      1
      22.0250
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.109891
      -0.242236
    
    
      395
      1287
      18.000000
      1
      0
      60.0000
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
      -0.944743
      0.438310
    
    
      396
      1288
      24.000000
      0
      0
      7.2500
      1
      0
      0
      1
      0
      0
      1
      0
      0
      1
      -0.489370
      -0.507017
    
    
      397
      1289
      48.000000
      1
      1
      79.2000
      0
      1
      1
      0
      0
      1
      0
      1
      0
      0
      1.332126
      0.782391
    
    
      398
      1290
      22.000000
      0
      0
      7.7750
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      -0.641161
      -0.497608
    
    
      399
      1291
      31.000000
      0
      0
      7.7333
      1
      0
      0
      1
      0
      0
      1
      0
      0
      1
      0.041900
      -0.498356
    
    
      400
      1292
      30.000000
      0
      0
      164.8667
      0
      1
      0
      0
      1
      1
      0
      1
      0
      0
      -0.033996
      2.317614
    
    
      401
      1293
      38.000000
      1
      0
      21.0000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      0.573169
      -0.260605
    
    
      402
      1294
      22.000000
      0
      1
      59.4000
      1
      0
      1
      0
      0
      1
      0
      1
      0
      0
      -0.641161
      0.427557
    
    
      403
      1295
      17.000000
      0
      0
      47.1000
      1
      0
      0
      0
      1
      0
      1
      1
      0
      0
      -1.020639
      0.207130
    
    
      404
      1296
      43.000000
      1
      0
      27.7208
      0
      1
      1
      0
      0
      0
      1
      1
      0
      0
      0.952648
      -0.140162
    
    
      405
      1297
      20.000000
      0
      0
      13.8625
      0
      1
      1
      0
      0
      0
      1
      0
      1
      0
      -0.792952
      -0.388515
    
    
      406
      1298
      23.000000
      1
      0
      10.5000
      1
      0
      0
      0
      1
      0
      1
      0
      1
      0
      -0.565265
      -0.448774
    
    
      407
      1299
      50.000000
      1
      1
      211.5000
      0
      1
      1
      0
      0
      0
      1
      1
      0
      0
      1.483917
      3.153324
    
    
      408
      1300
      19.895581
      0
      0
      7.7208
      1
      0
      0
      1
      0
      1
      0
      0
      0
      1
      -0.800877
      -0.498580
    
    
      409
      1301
      3.000000
      1
      1
      13.7750
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -2.083178
      -0.390083
    
    
      410
      1302
      35.295824
      0
      0
      7.7500
      1
      0
      0
      1
      0
      1
      0
      0
      0
      1
      0.367934
      -0.498056
    
    
      411
      1303
      37.000000
      1
      0
      90.0000
      0
      1
      0
      1
      0
      1
      0
      1
      0
      0
      0.497274
      0.975936
    
    
      412
      1304
      28.000000
      0
      0
      7.7750
      1
      0
      0
      0
      1
      1
      0
      0
      0
      1
      -0.185787
      -0.497608
    
    
      413
      1305
      30.705727
      0
      0
      8.0500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      0.019566
      -0.492680
    
    
      414
      1306
      39.000000
      0
      0
      108.9000
      0
      1
      1
      0
      0
      1
      0
      1
      0
      0
      0.649065
      1.314641
    
    
      415
      1307
      38.500000
      0
      0
      7.2500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      0.611117
      -0.507017
    
    
      416
      1308
      30.705727
      0
      0
      8.0500
      1
      0
      0
      0
      1
      0
      1
      0
      0
      1
      0.019566
      -0.492680
    
    
      417
      1309
      25.793502
      1
      1
      22.3583
      1
      0
      1
      0
      0
      0
      1
      0
      0
      1
      -0.353251
      -0.236263
    
  

418 rows × 17 columns



In [277]:

    
test = df_test.filter(regex='Age_.*|SibSp|Parch|Fare_.*|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
predictions = clf.predict(test)
result = pd.DataFrame({'PassengerId':data_test['PassengerId'].as_matrix(), 'Survived':predictions.astype(np.int32)})
result.to_csv("logistic_regression_predictions.csv", index=False)



In [278]:

    
# BaseLine 系统构建完成

学习曲线



In [279]:

    
import numpy as np
import matplotlib.pyplot as plt
from sklearn.learning_curve import learning_curve

# 用sklearn的learning_curve得到training_score和cv_score，使用matplotlib画出learning curve
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, 
                        train_sizes=np.linspace(.05, 1., 20), verbose=0, plot=True):
    """
    画出data在某模型上的learning curve.
    参数解释
    ----------
    estimator : 你用的分类器。
    title : 表格的标题。
    X : 输入的feature，numpy类型
    y : 输入的target vector
    ylim : tuple格式的(ymin, ymax), 设定图像中纵坐标的最低点和最高点
    cv : 做cross-validation的时候，数据分成的份数，其中一份作为cv集，其余n-1份作为training(默认为3份)
    n_jobs : 并行的的任务数(默认1)
    """
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, verbose=verbose)
    
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    
    if plot:
        plt.figure()
        plt.title(title)
        if ylim is not None:
            plt.ylim(*ylim)
        plt.xlabel(u"训练样本数")
        plt.ylabel(u"得分")
        plt.gca().invert_yaxis()
        plt.grid()
    
        plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, 
                         alpha=0.1, color="b")
        plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, 
                         alpha=0.1, color="r")
        plt.plot(train_sizes, train_scores_mean, 'o-', color="b", label=u"训练集上得分")
        plt.plot(train_sizes, test_scores_mean, 'o-', color="r", label=u"交叉验证集上得分")
    
        plt.legend(loc="best")
        
        plt.draw()
        plt.gca().invert_yaxis()
        plt.show()
    
    midpoint = ((train_scores_mean[-1] + train_scores_std[-1]) + (test_scores_mean[-1] - test_scores_std[-1])) / 2
    diff = (train_scores_mean[-1] + train_scores_std[-1]) - (test_scores_mean[-1] - test_scores_std[-1])
    return midpoint, diff



In [280]:

    
pd.DataFrame({"columns":list(train_df.columns)[1:], "coef":list(clf.coef_.T)})









    Out[280]:







  
    
      
      coef
      columns
    
  
  
    
      0
      [-0.34422839921]
      SibSp
    
    
      1
      [-0.104931034051]
      Parch
    
    
      2
      [0.0]
      Cabin_No
    
    
      3
      [0.902141342399]
      Cabin_Yes
    
    
      4
      [0.0]
      Embarked_C
    
    
      5
      [0.0]
      Embarked_Q
    
    
      6
      [-0.417261046864]
      Embarked_S
    
    
      7
      [1.95657542024]
      Sex_female
    
    
      8
      [-0.677420626893]
      Sex_male
    
    
      9
      [0.341143936557]
      Pclass_1
    
    
      10
      [0.0]
      Pclass_2
    
    
      11
      [-1.1941395756]
      Pclass_3
    
    
      12
      [-0.523782091128]
      Age_scaled
    
    
      13
      [0.0844324435407]
      Fare_scaled

Cross validation CV



In [ ]:

    
from sklearn

	PassengerId	Survived	Pclass	Age	SibSp	Parch	Fare
count	891.000000	891.000000	891.000000	714.000000	891.000000	891.000000	891.000000
mean	446.000000	0.383838	2.308642	29.699118	0.523008	0.381594	32.204208
std	257.353842	0.486592	0.836071	14.526497	1.102743	0.806057	49.693429
min	1.000000	0.000000	1.000000	0.420000	0.000000	0.000000	0.000000
25%	223.500000	0.000000	2.000000	20.125000	0.000000	0.000000	7.910400
50%	446.000000	0.000000	3.000000	28.000000	0.000000	0.000000	14.454200
75%	668.500000	1.000000	3.000000	38.000000	1.000000	0.000000	31.000000
max	891.000000	1.000000	3.000000	80.000000	8.000000	6.000000	512.329200

		PassengerId
SibSp	Survived
0	0	398
0	1	210
1	0	97
1	1	112
2	0	15
2	1	13
3	0	12
3	1	4
4	0	15
4	1	3
5	0	5
8	0	7

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.000000	1	0	A/5 21171	7.2500	No	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.000000	1	0	PC 17599	71.2833	Yes	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.000000	0	0	STON/O2. 3101282	7.9250	No	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.000000	1	0	113803	53.1000	Yes	S
4	5	0	3	Allen, Mr. William Henry	male	35.000000	0	0	373450	8.0500	No	S
5	6	0	3	Moran, Mr. James	male	23.828953	0	0	330877	8.4583	No	Q
6	7	0	1	McCarthy, Mr. Timothy J	male	54.000000	0	0	17463	51.8625	Yes	S
7	8	0	3	Palsson, Master. Gosta Leonard	male	2.000000	3	1	349909	21.0750	No	S
8	9	1	3	Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)	female	27.000000	0	2	347742	11.1333	No	S
9	10	1	2	Nasser, Mrs. Nicholas (Adele Achem)	female	14.000000	1	0	237736	30.0708	No	C
10	11	1	3	Sandstrom, Miss. Marguerite Rut	female	4.000000	1	1	PP 9549	16.7000	Yes	S
11	12	1	1	Bonnell, Miss. Elizabeth	female	58.000000	0	0	113783	26.5500	Yes	S
12	13	0	3	Saundercock, Mr. William Henry	male	20.000000	0	0	A/5. 2151	8.0500	No	S
13	14	0	3	Andersson, Mr. Anders Johan	male	39.000000	1	5	347082	31.2750	No	S
14	15	0	3	Vestrom, Miss. Hulda Amanda Adolfina	female	14.000000	0	0	350406	7.8542	No	S
15	16	1	2	Hewlett, Mrs. (Mary D Kingcome)	female	55.000000	0	0	248706	16.0000	No	S
16	17	0	3	Rice, Master. Eugene	male	2.000000	4	1	382652	29.1250	No	Q
17	18	1	2	Williams, Mr. Charles Eugene	male	32.066493	0	0	244373	13.0000	No	S
18	19	0	3	Vander Planke, Mrs. Julius (Emelia Maria Vande...	female	31.000000	1	0	345763	18.0000	No	S
19	20	1	3	Masselmani, Mrs. Fatima	female	29.518205	0	0	2649	7.2250	No	C
20	21	0	2	Fynney, Mr. Joseph J	male	35.000000	0	0	239865	26.0000	No	S
21	22	1	2	Beesley, Mr. Lawrence	male	34.000000	0	0	248698	13.0000	Yes	S
22	23	1	3	McGowan, Miss. Anna "Annie"	female	15.000000	0	0	330923	8.0292	No	Q
23	24	1	1	Sloper, Mr. William Thompson	male	28.000000	0	0	113788	35.5000	Yes	S
24	25	0	3	Palsson, Miss. Torborg Danira	female	8.000000	3	1	349909	21.0750	No	S
25	26	1	3	Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...	female	38.000000	1	5	347077	31.3875	No	S
26	27	0	3	Emir, Mr. Farred Chehab	male	29.518205	0	0	2631	7.2250	No	C
27	28	0	1	Fortune, Mr. Charles Alexander	male	19.000000	3	2	19950	263.0000	Yes	S
28	29	1	3	O'Dwyer, Miss. Ellen "Nellie"	female	22.380113	0	0	330959	7.8792	No	Q
29	30	0	3	Todoroff, Mr. Lalio	male	27.947206	0	0	349216	7.8958	No	S
...	...	...	...	...	...	...	...	...	...	...	...	...
861	862	0	2	Giles, Mr. Frederick Edward	male	21.000000	1	0	28134	11.5000	No	S
862	863	1	1	Swift, Mrs. Frederick Joel (Margaret Welles Ba...	female	48.000000	0	0	17466	25.9292	Yes	S
863	864	0	3	Sage, Miss. Dorothy Edith "Dolly"	female	10.869867	8	2	CA. 2343	69.5500	No	S
864	865	0	2	Gill, Mr. John William	male	24.000000	0	0	233866	13.0000	No	S
865	866	1	2	Bystrom, Mrs. (Karolina)	female	42.000000	0	0	236852	13.0000	No	S
866	867	1	2	Duran y More, Miss. Asuncion	female	27.000000	1	0	SC/PARIS 2149	13.8583	No	C
867	868	0	1	Roebling, Mr. Washington Augustus II	male	31.000000	0	0	PC 17590	50.4958	Yes	S
868	869	0	3	van Melkebeke, Mr. Philemon	male	25.977889	0	0	345777	9.5000	No	S
869	870	1	3	Johnson, Master. Harold Theodor	male	4.000000	1	1	347742	11.1333	No	S
870	871	0	3	Balkic, Mr. Cerin	male	26.000000	0	0	349248	7.8958	No	S
871	872	1	1	Beckwith, Mrs. Richard Leonard (Sallie Monypeny)	female	47.000000	1	1	11751	52.5542	Yes	S
872	873	0	1	Carlsson, Mr. Frans Olof	male	33.000000	0	0	695	5.0000	Yes	S
873	874	0	3	Vander Cruyssen, Mr. Victor	male	47.000000	0	0	345765	9.0000	No	S
874	875	1	2	Abelson, Mrs. Samuel (Hannah Wizosky)	female	28.000000	1	0	P/PP 3381	24.0000	No	C
875	876	1	3	Najib, Miss. Adele Kiamie "Jane"	female	15.000000	0	0	2667	7.2250	No	C
876	877	0	3	Gustafsson, Mr. Alfred Ossian	male	20.000000	0	0	7534	9.8458	No	S
877	878	0	3	Petroff, Mr. Nedelio	male	19.000000	0	0	349212	7.8958	No	S
878	879	0	3	Laleff, Mr. Kristo	male	27.947206	0	0	349217	7.8958	No	S
879	880	1	1	Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)	female	56.000000	0	1	11767	83.1583	Yes	C
880	881	1	2	Shelley, Mrs. William (Imanita Parrish Hall)	female	25.000000	0	1	230433	26.0000	No	S
881	882	0	3	Markun, Mr. Johann	male	33.000000	0	0	349257	7.8958	No	S
882	883	0	3	Dahlberg, Miss. Gerda Ulrika	female	22.000000	0	0	7552	10.5167	No	S
883	884	0	2	Banfield, Mr. Frederick James	male	28.000000	0	0	C.A./SOTON 34068	10.5000	No	S
884	885	0	3	Sutehall, Mr. Henry Jr	male	25.000000	0	0	SOTON/OQ 392076	7.0500	No	S
885	886	0	3	Rice, Mrs. William (Margaret Norton)	female	39.000000	0	5	382652	29.1250	No	Q
886	887	0	2	Montvila, Rev. Juozas	male	27.000000	0	0	211536	13.0000	No	S
887	888	1	1	Graham, Miss. Margaret Edith	female	19.000000	0	0	112053	30.0000	Yes	S
888	889	0	3	Johnston, Miss. Catherine Helen "Carrie"	female	16.127950	1	2	W./C. 6607	23.4500	No	S
889	890	1	1	Behr, Mr. Karl Howell	male	26.000000	0	0	111369	30.0000	Yes	C
890	891	0	3	Dooley, Mr. Patrick	male	32.000000	0	0	370376	7.7500	No	Q

	PassengerId	Survived	Age	SibSp	Parch	Fare	Cabin_No	Cabin_Yes	Embarked_C	Embarked_Q	Embarked_S	Sex_female	Sex_male	Pclass_1	Pclass_2	Pclass_3	Age_scaled	Fare_scaled
0	1	0	22.000000	1	0	7.2500	1	0	0	0	1	0	1	0	0	1	-0.561363	-0.502445
1	2	1	38.000000	1	0	71.2833	0	1	1	0	0	1	0	1	0	0	0.613182	0.786845
2	3	1	26.000000	0	0	7.9250	1	0	0	0	1	1	0	0	0	1	-0.267727	-0.488854
3	4	1	35.000000	1	0	53.1000	0	1	0	0	1	1	0	1	0	0	0.392955	0.420730
4	5	0	35.000000	0	0	8.0500	1	0	0	0	1	0	1	0	0	1	0.392955	-0.486337
5	6	0	23.828953	0	0	8.4583	1	0	0	1	0	0	1	0	0	1	-0.427102	-0.478116
6	7	0	54.000000	0	0	51.8625	0	1	0	0	1	0	1	1	0	0	1.787727	0.395814
7	8	0	2.000000	3	1	21.0750	1	0	0	0	1	0	1	0	0	1	-2.029545	-0.224083
8	9	1	27.000000	0	2	11.1333	1	0	0	0	1	1	0	0	0	1	-0.194318	-0.424256
9	10	1	14.000000	1	0	30.0708	1	0	1	0	0	1	0	0	1	0	-1.148636	-0.042956
10	11	1	4.000000	1	1	16.7000	0	1	0	0	1	1	0	0	0	1	-1.882726	-0.312172
11	12	1	58.000000	0	0	26.5500	0	1	0	0	1	1	0	1	0	0	2.081363	-0.113846
12	13	0	20.000000	0	0	8.0500	1	0	0	0	1	0	1	0	0	1	-0.708181	-0.486337
13	14	0	39.000000	1	5	31.2750	1	0	0	0	1	0	1	0	0	1	0.686591	-0.018709
14	15	0	14.000000	0	0	7.8542	1	0	0	0	1	1	0	0	0	1	-1.148636	-0.490280
15	16	1	55.000000	0	0	16.0000	1	0	0	0	1	1	0	0	1	0	1.861136	-0.326267
16	17	0	2.000000	4	1	29.1250	1	0	0	1	0	0	1	0	0	1	-2.029545	-0.061999
17	18	1	32.066493	0	0	13.0000	1	0	0	0	1	0	1	0	1	0	0.177609	-0.386671
18	19	0	31.000000	1	0	18.0000	1	0	0	0	1	1	0	0	0	1	0.099318	-0.285997
19	20	1	29.518205	0	0	7.2250	1	0	1	0	0	1	0	0	0	1	-0.009459	-0.502949
20	21	0	35.000000	0	0	26.0000	1	0	0	0	1	0	1	0	1	0	0.392955	-0.124920
21	22	1	34.000000	0	0	13.0000	0	1	0	0	1	0	1	0	1	0	0.319546	-0.386671
22	23	1	15.000000	0	0	8.0292	1	0	0	1	0	1	0	0	0	1	-1.075227	-0.486756
23	24	1	28.000000	0	0	35.5000	0	1	0	0	1	0	1	1	0	0	-0.120909	0.066360
24	25	0	8.000000	3	1	21.0750	1	0	0	0	1	1	0	0	0	1	-1.589090	-0.224083
25	26	1	38.000000	1	5	31.3875	1	0	0	0	1	1	0	0	0	1	0.613182	-0.016444
26	27	0	29.518205	0	0	7.2250	1	0	1	0	0	0	1	0	0	1	-0.009459	-0.502949
27	28	0	19.000000	3	2	263.0000	0	1	0	0	1	0	1	1	0	0	-0.781590	4.647001
28	29	1	22.380113	0	0	7.8792	1	0	0	1	0	1	0	0	0	1	-0.533459	-0.489776
29	30	0	27.947206	0	0	7.8958	1	0	0	0	1	0	1	0	0	1	-0.124784	-0.489442
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
861	862	0	21.000000	1	0	11.5000	1	0	0	0	1	0	1	0	1	0	-0.634772	-0.416873
862	863	1	48.000000	0	0	25.9292	0	1	0	0	1	1	0	1	0	0	1.347272	-0.126345
863	864	0	10.869867	8	2	69.5500	1	0	0	0	1	1	0	0	0	1	-1.378416	0.751946
864	865	0	24.000000	0	0	13.0000	1	0	0	0	1	0	1	0	1	0	-0.414545	-0.386671
865	866	1	42.000000	0	0	13.0000	1	0	0	0	1	1	0	0	1	0	0.906818	-0.386671
866	867	1	27.000000	1	0	13.8583	1	0	1	0	0	1	0	0	1	0	-0.194318	-0.369389
867	868	0	31.000000	0	0	50.4958	0	1	0	0	1	0	1	1	0	0	0.099318	0.368295
868	869	0	25.977889	0	0	9.5000	1	0	0	0	1	0	1	0	0	1	-0.269350	-0.457142
869	870	1	4.000000	1	1	11.1333	1	0	0	0	1	0	1	0	0	1	-1.882726	-0.424256
870	871	0	26.000000	0	0	7.8958	1	0	0	0	1	0	1	0	0	1	-0.267727	-0.489442
871	872	1	47.000000	1	1	52.5542	0	1	0	0	1	1	0	1	0	0	1.273863	0.409741
872	873	0	33.000000	0	0	5.0000	0	1	0	0	1	0	1	1	0	0	0.246136	-0.547748
873	874	0	47.000000	0	0	9.0000	1	0	0	0	1	0	1	0	0	1	1.273863	-0.467209
874	875	1	28.000000	1	0	24.0000	1	0	1	0	0	1	0	0	1	0	-0.120909	-0.165189
875	876	1	15.000000	0	0	7.2250	1	0	1	0	0	1	0	0	0	1	-1.075227	-0.502949
876	877	0	20.000000	0	0	9.8458	1	0	0	0	1	0	1	0	0	1	-0.708181	-0.450180
877	878	0	19.000000	0	0	7.8958	1	0	0	0	1	0	1	0	0	1	-0.781590	-0.489442
878	879	0	27.947206	0	0	7.8958	1	0	0	0	1	0	1	0	0	1	-0.124784	-0.489442
879	880	1	56.000000	0	1	83.1583	0	1	1	0	0	1	0	1	0	0	1.934545	1.025945
880	881	1	25.000000	0	1	26.0000	1	0	0	0	1	1	0	0	1	0	-0.341136	-0.124920
881	882	0	33.000000	0	0	7.8958	1	0	0	0	1	0	1	0	0	1	0.246136	-0.489442
882	883	0	22.000000	0	0	10.5167	1	0	0	0	1	1	0	0	0	1	-0.561363	-0.436671
883	884	0	28.000000	0	0	10.5000	1	0	0	0	1	0	1	0	1	0	-0.120909	-0.437007
884	885	0	25.000000	0	0	7.0500	1	0	0	0	1	0	1	0	0	1	-0.341136	-0.506472
885	886	0	39.000000	0	5	29.1250	1	0	0	1	0	1	0	0	0	1	0.686591	-0.061999
886	887	0	27.000000	0	0	13.0000	1	0	0	0	1	0	1	0	1	0	-0.194318	-0.386671
887	888	1	19.000000	0	0	30.0000	0	1	0	0	1	1	0	1	0	0	-0.781590	-0.044381
888	889	0	16.127950	1	2	23.4500	1	0	0	0	1	1	0	0	0	1	-0.992425	-0.176263
889	890	1	26.000000	0	0	30.0000	0	1	1	0	0	0	1	1	0	0	-0.267727	-0.044381
890	891	0	32.000000	0	0	7.7500	1	0	0	1	0	0	1	0	0	1	0.172727	-0.492378

	PassengerId	Age	SibSp	Parch	Fare	Cabin_No	Cabin_Yes	Embarked_C	Embarked_Q	Embarked_S	Sex_female	Sex_male	Pclass_1	Pclass_2	Pclass_3	Age_scaled	Fare_scaled
0	892	34.500000	0	0	7.8292	1	0	0	1	0	0	1	0	0	1	0.307535	-0.496637
1	893	47.000000	1	0	7.0000	1	0	0	0	1	1	0	0	0	1	1.256230	-0.511497
2	894	62.000000	0	0	9.6875	1	0	0	1	0	0	1	0	1	0	2.394665	-0.463335
3	895	27.000000	0	0	8.6625	1	0	0	0	1	0	1	0	0	1	-0.261683	-0.481704
4	896	22.000000	1	1	12.2875	1	0	0	0	1	1	0	0	0	1	-0.641161	-0.416740
5	897	14.000000	0	0	9.2250	1	0	0	0	1	0	1	0	0	1	-1.248326	-0.471623
6	898	30.000000	0	0	7.6292	1	0	0	1	0	1	0	0	0	1	-0.033996	-0.500221
7	899	26.000000	1	1	29.0000	1	0	0	0	1	0	1	0	1	0	-0.337578	-0.117238
8	900	18.000000	0	0	7.2292	1	0	1	0	0	1	0	0	0	1	-0.944743	-0.507390
9	901	21.000000	2	0	24.1500	1	0	0	0	1	0	1	0	0	1	-0.717056	-0.204154
10	902	27.947206	0	0	7.8958	1	0	0	0	1	0	1	0	0	1	-0.189794	-0.495444
11	903	46.000000	0	0	26.0000	1	0	0	0	1	0	1	1	0	0	1.180334	-0.171000
12	904	23.000000	1	0	82.2667	0	1	0	0	1	1	0	1	0	0	-0.565265	0.837349
13	905	63.000000	1	0	26.0000	1	0	0	0	1	0	1	0	1	0	2.470560	-0.171000
14	906	47.000000	1	0	61.1750	0	1	0	0	1	1	0	1	0	0	1.256230	0.459367
15	907	24.000000	1	0	27.7208	1	0	1	0	0	1	0	0	1	0	-0.489370	-0.140162
16	908	35.000000	0	0	12.3500	1	0	0	1	0	0	1	0	1	0	0.345482	-0.415620
17	909	21.000000	0	0	7.2250	1	0	1	0	0	0	1	0	0	1	-0.717056	-0.507465
18	910	27.000000	1	0	7.9250	1	0	0	0	1	1	0	0	0	1	-0.261683	-0.494920
19	911	45.000000	0	0	7.2250	1	0	1	0	0	1	0	0	0	1	1.104439	-0.507465
20	912	55.000000	1	0	59.4000	1	0	1	0	0	0	1	1	0	0	1.863395	0.427557
21	913	9.000000	0	1	3.1708	1	0	0	0	1	0	1	0	0	1	-1.627804	-0.580120
22	914	52.314311	0	0	31.6833	1	0	0	0	1	1	0	1	0	0	1.659563	-0.069151
23	915	21.000000	0	1	61.3792	1	0	1	0	0	0	1	1	0	0	-0.717056	0.463026
24	916	48.000000	1	3	262.3750	0	1	1	0	0	1	0	1	0	0	1.332126	4.065049
25	917	50.000000	1	0	14.5000	1	0	0	0	1	0	1	0	0	1	1.483917	-0.377090
26	918	22.000000	0	1	61.9792	0	1	1	0	0	1	0	1	0	0	-0.641161	0.473779
27	919	22.500000	0	0	7.2250	1	0	1	0	0	0	1	0	0	1	-0.603213	-0.507465
28	920	41.000000	0	0	30.5000	0	1	0	0	1	0	1	1	0	0	0.800856	-0.090356
29	921	23.458621	2	0	21.6792	1	0	1	0	0	0	1	0	0	1	-0.530458	-0.248433
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
388	1280	21.000000	0	0	7.7500	1	0	0	1	0	0	1	0	0	1	-0.717056	-0.498056
389	1281	6.000000	3	1	21.0750	1	0	0	0	1	0	1	0	0	1	-1.855491	-0.259261
390	1282	23.000000	0	0	93.5000	0	1	0	0	1	0	1	1	0	0	-0.565265	1.038659
391	1283	51.000000	0	1	39.4000	0	1	0	0	1	1	0	1	0	0	1.559813	0.069140
392	1284	13.000000	0	2	20.2500	1	0	0	0	1	0	1	0	0	1	-1.324222	-0.274045
393	1285	47.000000	0	0	10.5000	1	0	0	0	1	0	1	0	1	0	1.256230	-0.448774
394	1286	29.000000	3	1	22.0250	1	0	0	0	1	0	1	0	0	1	-0.109891	-0.242236
395	1287	18.000000	1	0	60.0000	0	1	0	0	1	1	0	1	0	0	-0.944743	0.438310
396	1288	24.000000	0	0	7.2500	1	0	0	1	0	0	1	0	0	1	-0.489370	-0.507017
397	1289	48.000000	1	1	79.2000	0	1	1	0	0	1	0	1	0	0	1.332126	0.782391
398	1290	22.000000	0	0	7.7750	1	0	0	0	1	0	1	0	0	1	-0.641161	-0.497608
399	1291	31.000000	0	0	7.7333	1	0	0	1	0	0	1	0	0	1	0.041900	-0.498356
400	1292	30.000000	0	0	164.8667	0	1	0	0	1	1	0	1	0	0	-0.033996	2.317614
401	1293	38.000000	1	0	21.0000	1	0	0	0	1	0	1	0	1	0	0.573169	-0.260605
402	1294	22.000000	0	1	59.4000	1	0	1	0	0	1	0	1	0	0	-0.641161	0.427557
403	1295	17.000000	0	0	47.1000	1	0	0	0	1	0	1	1	0	0	-1.020639	0.207130
404	1296	43.000000	1	0	27.7208	0	1	1	0	0	0	1	1	0	0	0.952648	-0.140162
405	1297	20.000000	0	0	13.8625	0	1	1	0	0	0	1	0	1	0	-0.792952	-0.388515
406	1298	23.000000	1	0	10.5000	1	0	0	0	1	0	1	0	1	0	-0.565265	-0.448774
407	1299	50.000000	1	1	211.5000	0	1	1	0	0	0	1	1	0	0	1.483917	3.153324
408	1300	19.895581	0	0	7.7208	1	0	0	1	0	1	0	0	0	1	-0.800877	-0.498580
409	1301	3.000000	1	1	13.7750	1	0	0	0	1	1	0	0	0	1	-2.083178	-0.390083
410	1302	35.295824	0	0	7.7500	1	0	0	1	0	1	0	0	0	1	0.367934	-0.498056
411	1303	37.000000	1	0	90.0000	0	1	0	1	0	1	0	1	0	0	0.497274	0.975936
412	1304	28.000000	0	0	7.7750	1	0	0	0	1	1	0	0	0	1	-0.185787	-0.497608
413	1305	30.705727	0	0	8.0500	1	0	0	0	1	0	1	0	0	1	0.019566	-0.492680
414	1306	39.000000	0	0	108.9000	0	1	1	0	0	1	0	1	0	0	0.649065	1.314641
415	1307	38.500000	0	0	7.2500	1	0	0	0	1	0	1	0	0	1	0.611117	-0.507017
416	1308	30.705727	0	0	8.0500	1	0	0	0	1	0	1	0	0	1	0.019566	-0.492680
417	1309	25.793502	1	1	22.3583	1	0	1	0	0	0	1	0	0	1	-0.353251	-0.236263

	coef	columns
0	[-0.34422839921]	SibSp
1	[-0.104931034051]	Parch
2	[0.0]	Cabin_No
3	[0.902141342399]	Cabin_Yes
4	[0.0]	Embarked_C
5	[0.0]	Embarked_Q
6	[-0.417261046864]	Embarked_S
7	[1.95657542024]	Sex_female
8	[-0.677420626893]	Sex_male
9	[0.341143936557]	Pclass_1
10	[0.0]	Pclass_2
11	[-1.1941395756]	Pclass_3
12	[-0.523782091128]	Age_scaled
13	[0.0844324435407]	Fare_scaled