notebook.community

Edit and run



In [1]:

    
# Imports

# pandas
import pandas as pd
from pandas import Series,DataFrame

# numpy, matplotlib, seaborn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

# machine learning
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB



In [2]:

    
# get titanic & test csv files as a DataFrame
titanic_df = pd.read_csv("./input/train.csv")
test_df    = pd.read_csv("./input/test.csv")

# preview the data
titanic_df.head()









    Out[2]:







  
    
      
      PassengerId
      Survived
      Pclass
      Name
      Sex
      Age
      SibSp
      Parch
      Ticket
      Fare
      Cabin
      Embarked
    
  
  
    
      0
      1
      0
      3
      Braund, Mr. Owen Harris
      male
      22.0
      1
      0
      A/5 21171
      7.2500
      NaN
      S
    
    
      1
      2
      1
      1
      Cumings, Mrs. John Bradley (Florence Briggs Th...
      female
      38.0
      1
      0
      PC 17599
      71.2833
      C85
      C
    
    
      2
      3
      1
      3
      Heikkinen, Miss. Laina
      female
      26.0
      0
      0
      STON/O2. 3101282
      7.9250
      NaN
      S
    
    
      3
      4
      1
      1
      Futrelle, Mrs. Jacques Heath (Lily May Peel)
      female
      35.0
      1
      0
      113803
      53.1000
      C123
      S
    
    
      4
      5
      0
      3
      Allen, Mr. William Henry
      male
      35.0
      0
      0
      373450
      8.0500
      NaN
      S



In [3]:

    
titanic_df.info()
print("----------------------------")
test_df.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB
----------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
PassengerId    418 non-null int64
Pclass         418 non-null int64
Name           418 non-null object
Sex            418 non-null object
Age            332 non-null float64
SibSp          418 non-null int64
Parch          418 non-null int64
Ticket         418 non-null object
Fare           417 non-null float64
Cabin          91 non-null object
Embarked       418 non-null object
dtypes: float64(2), int64(4), object(5)
memory usage: 36.0+ KB



In [4]:

    
# drop unnecessary columns, these columns won't be useful in analysis and prediction
titanic_df = titanic_df.drop(['PassengerId','Name','Ticket'], axis=1)
test_df    = test_df.drop(['Name','Ticket'], axis=1)



In [5]:

    
# Embarked

# only in titanic_df, fill the two missing values with the most occurred value, which is "S".
titanic_df["Embarked"] = titanic_df["Embarked"].fillna("S")

# plot
sns.factorplot('Embarked','Survived', data=titanic_df,size=4,aspect=3)

fig, (axis1,axis2,axis3) = plt.subplots(1,3,figsize=(15,5))

# sns.factorplot('Embarked',data=titanic_df,kind='count',order=['S','C','Q'],ax=axis1)
# sns.factorplot('Survived',hue="Embarked",data=titanic_df,kind='count',order=[1,0],ax=axis2)
sns.countplot(x='Embarked', data=titanic_df, ax=axis1)
sns.countplot(x='Survived', hue="Embarked", data=titanic_df, order=[1,0], ax=axis2)

# group by embarked, and get the mean for survived passengers for each value in Embarked
embark_perc = titanic_df[["Embarked", "Survived"]].groupby(['Embarked'],as_index=False).mean()
sns.barplot(x='Embarked', y='Survived', data=embark_perc,order=['S','C','Q'],ax=axis3)

# Either to consider Embarked column in predictions,
# and remove "S" dummy variable, 
# and leave "C" & "Q", since they seem to have a good rate for Survival.

# OR, don't create dummy variables for Embarked column, just drop it, 
# because logically, Embarked doesn't seem to be useful in prediction.

embark_dummies_titanic  = pd.get_dummies(titanic_df['Embarked'])
embark_dummies_titanic.drop(['S'], axis=1, inplace=True)

embark_dummies_test  = pd.get_dummies(test_df['Embarked'])
embark_dummies_test.drop(['S'], axis=1, inplace=True)

titanic_df = titanic_df.join(embark_dummies_titanic)
test_df    = test_df.join(embark_dummies_test)

titanic_df.drop(['Embarked'], axis=1,inplace=True)
test_df.drop(['Embarked'], axis=1,inplace=True)



In [6]:

    
# Fare

# only for test_df, since there is a missing "Fare" values
test_df["Fare"].fillna(test_df["Fare"].median(), inplace=True)

# convert from float to int
titanic_df['Fare'] = titanic_df['Fare'].astype(int)
test_df['Fare']    = test_df['Fare'].astype(int)

# get fare for survived & didn't survive passengers 
fare_not_survived = titanic_df["Fare"][titanic_df["Survived"] == 0]
fare_survived     = titanic_df["Fare"][titanic_df["Survived"] == 1]

# get average and std for fare of survived/not survived passengers
avgerage_fare = DataFrame([fare_not_survived.mean(), fare_survived.mean()])
std_fare      = DataFrame([fare_not_survived.std(), fare_survived.std()])

# plot
titanic_df['Fare'].plot(kind='hist', figsize=(15,3),bins=100, xlim=(0,50))

avgerage_fare.index.names = std_fare.index.names = ["Survived"]
avgerage_fare.plot(yerr=std_fare,kind='bar',legend=False)









    Out[6]:





<matplotlib.axes._subplots.AxesSubplot at 0x1e5f1a7dfd0>



In [7]:

    
# Age 

fig, (axis1,axis2) = plt.subplots(1,2,figsize=(15,4))
axis1.set_title('Original Age values - Titanic')
axis2.set_title('New Age values - Titanic')

# axis3.set_title('Original Age values - Test')
# axis4.set_title('New Age values - Test')

# get average, std, and number of NaN values in titanic_df
average_age_titanic   = titanic_df["Age"].mean()
std_age_titanic       = titanic_df["Age"].std()
count_nan_age_titanic = titanic_df["Age"].isnull().sum()

# get average, std, and number of NaN values in test_df
average_age_test   = test_df["Age"].mean()
std_age_test       = test_df["Age"].std()
count_nan_age_test = test_df["Age"].isnull().sum()

# generate random numbers between (mean - std) & (mean + std)
rand_1 = np.random.randint(average_age_titanic - std_age_titanic, average_age_titanic + std_age_titanic, size = count_nan_age_titanic)
rand_2 = np.random.randint(average_age_test - std_age_test, average_age_test + std_age_test, size = count_nan_age_test)

# plot original Age values
# NOTE: drop all null values, and convert to int
titanic_df['Age'].dropna().astype(int).hist(bins=70, ax=axis1)
# test_df['Age'].dropna().astype(int).hist(bins=70, ax=axis1)

# fill NaN values in Age column with random values generated
titanic_df["Age"][np.isnan(titanic_df["Age"])] = rand_1
test_df["Age"][np.isnan(test_df["Age"])] = rand_2

# convert from float to int
titanic_df['Age'] = titanic_df['Age'].astype(int)
test_df['Age']    = test_df['Age'].astype(int)
        
# plot new Age Values
titanic_df['Age'].hist(bins=70, ax=axis2)
# test_df['Age'].hist(bins=70, ax=axis4)









    



C:\Users\lvarr\AppData\Local\conda\conda\envs\tensorflow\lib\site-packages\ipykernel_launcher.py:30: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
C:\Users\lvarr\AppData\Local\conda\conda\envs\tensorflow\lib\site-packages\ipykernel_launcher.py:31: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy






    Out[7]:





<matplotlib.axes._subplots.AxesSubplot at 0x1e5f1a286a0>



In [8]:

    
# .... continue with plot Age column

# peaks for survived/not survived passengers by their age
facet = sns.FacetGrid(titanic_df, hue="Survived",aspect=4)
facet.map(sns.kdeplot,'Age',shade= True)
facet.set(xlim=(0, titanic_df['Age'].max()))
facet.add_legend()

# average survived passengers by age
fig, axis1 = plt.subplots(1,1,figsize=(18,4))
average_age = titanic_df[["Age", "Survived"]].groupby(['Age'],as_index=False).mean()
sns.barplot(x='Age', y='Survived', data=average_age)









    Out[8]:





<matplotlib.axes._subplots.AxesSubplot at 0x1e5f1b56f60>



In [9]:

    
# Cabin
# It has a lot of NaN values, so it won't cause a remarkable impact on prediction
titanic_df.drop("Cabin",axis=1,inplace=True)
test_df.drop("Cabin",axis=1,inplace=True)



In [10]:

    
# Family

# Instead of having two columns Parch & SibSp, 
# we can have only one column represent if the passenger had any family member aboard or not,
# Meaning, if having any family member(whether parent, brother, ...etc) will increase chances of Survival or not.
titanic_df['Family'] =  titanic_df["Parch"] + titanic_df["SibSp"]
titanic_df['Family'].loc[titanic_df['Family'] > 0] = 1
titanic_df['Family'].loc[titanic_df['Family'] == 0] = 0

test_df['Family'] =  test_df["Parch"] + test_df["SibSp"]
test_df['Family'].loc[test_df['Family'] > 0] = 1
test_df['Family'].loc[test_df['Family'] == 0] = 0

# drop Parch & SibSp
titanic_df = titanic_df.drop(['SibSp','Parch'], axis=1)
test_df    = test_df.drop(['SibSp','Parch'], axis=1)

# plot
fig, (axis1,axis2) = plt.subplots(1,2,sharex=True,figsize=(10,5))

# sns.factorplot('Family',data=titanic_df,kind='count',ax=axis1)
sns.countplot(x='Family', data=titanic_df, order=[1,0], ax=axis1)

# average of survived for those who had/didn't have any family member
family_perc = titanic_df[["Family", "Survived"]].groupby(['Family'],as_index=False).mean()
sns.barplot(x='Family', y='Survived', data=family_perc, order=[1,0], ax=axis2)

axis1.set_xticklabels(["With Family","Alone"], rotation=0)









    



C:\Users\lvarr\AppData\Local\conda\conda\envs\tensorflow\lib\site-packages\pandas\core\indexing.py:179: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)






    Out[10]:





[<matplotlib.text.Text at 0x1e5f1ff8358>,
 <matplotlib.text.Text at 0x1e5f1708240>]



In [11]:

    
# Sex

# As we see, children(age < ~16) on aboard seem to have a high chances for Survival.
# So, we can classify passengers as males, females, and child
def get_person(passenger):
    age,sex = passenger
    return 'child' if age < 16 else sex
    
titanic_df['Person'] = titanic_df[['Age','Sex']].apply(get_person,axis=1)
test_df['Person']    = test_df[['Age','Sex']].apply(get_person,axis=1)

# No need to use Sex column since we created Person column
titanic_df.drop(['Sex'],axis=1,inplace=True)
test_df.drop(['Sex'],axis=1,inplace=True)

# create dummy variables for Person column, & drop Male as it has the lowest average of survived passengers
person_dummies_titanic  = pd.get_dummies(titanic_df['Person'])
person_dummies_titanic.columns = ['Child','Female','Male']
person_dummies_titanic.drop(['Male'], axis=1, inplace=True)

person_dummies_test  = pd.get_dummies(test_df['Person'])
person_dummies_test.columns = ['Child','Female','Male']
person_dummies_test.drop(['Male'], axis=1, inplace=True)

titanic_df = titanic_df.join(person_dummies_titanic)
test_df    = test_df.join(person_dummies_test)

fig, (axis1,axis2) = plt.subplots(1,2,figsize=(10,5))

# sns.factorplot('Person',data=titanic_df,kind='count',ax=axis1)
sns.countplot(x='Person', data=titanic_df, ax=axis1)

# average of survived for each Person(male, female, or child)
person_perc = titanic_df[["Person", "Survived"]].groupby(['Person'],as_index=False).mean()
sns.barplot(x='Person', y='Survived', data=person_perc, ax=axis2, order=['male','female','child'])

titanic_df.drop(['Person'],axis=1,inplace=True)
test_df.drop(['Person'],axis=1,inplace=True)



In [12]:

    
# Pclass

# sns.factorplot('Pclass',data=titanic_df,kind='count',order=[1,2,3])
sns.factorplot('Pclass','Survived',order=[1,2,3], data=titanic_df,size=5)

# create dummy variables for Pclass column, & drop 3rd class as it has the lowest average of survived passengers
pclass_dummies_titanic  = pd.get_dummies(titanic_df['Pclass'])
pclass_dummies_titanic.columns = ['Class_1','Class_2','Class_3']
pclass_dummies_titanic.drop(['Class_3'], axis=1, inplace=True)

pclass_dummies_test  = pd.get_dummies(test_df['Pclass'])
pclass_dummies_test.columns = ['Class_1','Class_2','Class_3']
pclass_dummies_test.drop(['Class_3'], axis=1, inplace=True)

titanic_df.drop(['Pclass'],axis=1,inplace=True)
test_df.drop(['Pclass'],axis=1,inplace=True)

titanic_df = titanic_df.join(pclass_dummies_titanic)
test_df    = test_df.join(pclass_dummies_test)



In [13]:

    
# define training and testing sets

X_train = titanic_df.drop("Survived",axis=1)
Y_train = titanic_df["Survived"]
X_test  = test_df.drop("PassengerId",axis=1).copy()



In [14]:

    
# Logistic Regression

logreg = LogisticRegression()

logreg.fit(X_train, Y_train)

Y_pred = logreg.predict(X_test)

logreg.score(X_train, Y_train)









    Out[14]:





0.80920314253647585



In [15]:

    
# Support Vector Machines

# svc = SVC()

# svc.fit(X_train, Y_train)

# Y_pred = svc.predict(X_test)

# svc.score(X_train, Y_train)



In [16]:

    
# Random Forests

random_forest = RandomForestClassifier(n_estimators=100)

random_forest.fit(X_train, Y_train)

Y_pred = random_forest.predict(X_test)

random_forest.score(X_train, Y_train)









    Out[16]:





0.96632996632996637



In [17]:

    
# knn = KNeighborsClassifier(n_neighbors = 3)

# knn.fit(X_train, Y_train)

# Y_pred = knn.predict(X_test)

# knn.score(X_train, Y_train)



In [18]:

    
# Gaussian Naive Bayes

# gaussian = GaussianNB()

# gaussian.fit(X_train, Y_train)

# Y_pred = gaussian.predict(X_test)

# gaussian.score(X_train, Y_train)



In [19]:

    
# get Correlation Coefficient for each feature using Logistic Regression
coeff_df = DataFrame(titanic_df.columns.delete(0))
coeff_df.columns = ['Features']
coeff_df["Coefficient Estimate"] = pd.Series(logreg.coef_[0])

# preview
coeff_df









    Out[19]:







  
    
      
      Features
      Coefficient Estimate
    
  
  
    
      0
      Age
      -0.022065
    
    
      1
      Fare
      0.001030
    
    
      2
      C
      0.596758
    
    
      3
      Q
      0.317246
    
    
      4
      Family
      -0.207585
    
    
      5
      Child
      1.752809
    
    
      6
      Female
      2.743579
    
    
      7
      Class_1
      1.957364
    
    
      8
      Class_2
      1.116120



In [21]:

    
submission = pd.DataFrame({
        "PassengerId": test_df["PassengerId"],
        "Survived": Y_pred
    })
submission.to_csv('titanic.csv', index=False)



In [22]:

    
titanic_df









    Out[22]:







  
    
      
      Survived
      Age
      Fare
      C
      Q
      Family
      Child
      Female
      Class_1
      Class_2
    
  
  
    
      0
      0
      22
      7
      0
      0
      1
      0
      0
      0
      0
    
    
      1
      1
      38
      71
      1
      0
      1
      0
      1
      1
      0
    
    
      2
      1
      26
      7
      0
      0
      0
      0
      1
      0
      0
    
    
      3
      1
      35
      53
      0
      0
      1
      0
      1
      1
      0
    
    
      4
      0
      35
      8
      0
      0
      0
      0
      0
      0
      0
    
    
      5
      0
      42
      8
      0
      1
      0
      0
      0
      0
      0
    
    
      6
      0
      54
      51
      0
      0
      0
      0
      0
      1
      0
    
    
      7
      0
      2
      21
      0
      0
      1
      1
      0
      0
      0
    
    
      8
      1
      27
      11
      0
      0
      1
      0
      1
      0
      0
    
    
      9
      1
      14
      30
      1
      0
      1
      1
      0
      0
      1
    
    
      10
      1
      4
      16
      0
      0
      1
      1
      0
      0
      0
    
    
      11
      1
      58
      26
      0
      0
      0
      0
      1
      1
      0
    
    
      12
      0
      20
      8
      0
      0
      0
      0
      0
      0
      0
    
    
      13
      0
      39
      31
      0
      0
      1
      0
      0
      0
      0
    
    
      14
      0
      14
      7
      0
      0
      0
      1
      0
      0
      0
    
    
      15
      1
      55
      16
      0
      0
      0
      0
      1
      0
      1
    
    
      16
      0
      2
      29
      0
      1
      1
      1
      0
      0
      0
    
    
      17
      1
      26
      13
      0
      0
      0
      0
      0
      0
      1
    
    
      18
      0
      31
      18
      0
      0
      1
      0
      1
      0
      0
    
    
      19
      1
      34
      7
      1
      0
      0
      0
      1
      0
      0
    
    
      20
      0
      35
      26
      0
      0
      0
      0
      0
      0
      1
    
    
      21
      1
      34
      13
      0
      0
      0
      0
      0
      0
      1
    
    
      22
      1
      15
      8
      0
      1
      0
      1
      0
      0
      0
    
    
      23
      1
      28
      35
      0
      0
      0
      0
      0
      1
      0
    
    
      24
      0
      8
      21
      0
      0
      1
      1
      0
      0
      0
    
    
      25
      1
      38
      31
      0
      0
      1
      0
      1
      0
      0
    
    
      26
      0
      39
      7
      1
      0
      0
      0
      0
      0
      0
    
    
      27
      0
      19
      263
      0
      0
      1
      0
      0
      1
      0
    
    
      28
      1
      42
      7
      0
      1
      0
      0
      1
      0
      0
    
    
      29
      0
      21
      7
      0
      0
      0
      0
      0
      0
      0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      861
      0
      21
      11
      0
      0
      1
      0
      0
      0
      1
    
    
      862
      1
      48
      25
      0
      0
      0
      0
      1
      1
      0
    
    
      863
      0
      29
      69
      0
      0
      1
      0
      1
      0
      0
    
    
      864
      0
      24
      13
      0
      0
      0
      0
      0
      0
      1
    
    
      865
      1
      42
      13
      0
      0
      0
      0
      1
      0
      1
    
    
      866
      1
      27
      13
      1
      0
      1
      0
      1
      0
      1
    
    
      867
      0
      31
      50
      0
      0
      0
      0
      0
      1
      0
    
    
      868
      0
      34
      9
      0
      0
      0
      0
      0
      0
      0
    
    
      869
      1
      4
      11
      0
      0
      1
      1
      0
      0
      0
    
    
      870
      0
      26
      7
      0
      0
      0
      0
      0
      0
      0
    
    
      871
      1
      47
      52
      0
      0
      1
      0
      1
      1
      0
    
    
      872
      0
      33
      5
      0
      0
      0
      0
      0
      1
      0
    
    
      873
      0
      47
      9
      0
      0
      0
      0
      0
      0
      0
    
    
      874
      1
      28
      24
      1
      0
      1
      0
      1
      0
      1
    
    
      875
      1
      15
      7
      1
      0
      0
      1
      0
      0
      0
    
    
      876
      0
      20
      9
      0
      0
      0
      0
      0
      0
      0
    
    
      877
      0
      19
      7
      0
      0
      0
      0
      0
      0
      0
    
    
      878
      0
      30
      7
      0
      0
      0
      0
      0
      0
      0
    
    
      879
      1
      56
      83
      1
      0
      1
      0
      1
      1
      0
    
    
      880
      1
      25
      26
      0
      0
      1
      0
      1
      0
      1
    
    
      881
      0
      33
      7
      0
      0
      0
      0
      0
      0
      0
    
    
      882
      0
      22
      10
      0
      0
      0
      0
      1
      0
      0
    
    
      883
      0
      28
      10
      0
      0
      0
      0
      0
      0
      1
    
    
      884
      0
      25
      7
      0
      0
      0
      0
      0
      0
      0
    
    
      885
      0
      39
      29
      0
      1
      1
      0
      1
      0
      0
    
    
      886
      0
      27
      13
      0
      0
      0
      0
      0
      0
      1
    
    
      887
      1
      19
      30
      0
      0
      0
      0
      1
      1
      0
    
    
      888
      0
      30
      23
      0
      0
      1
      0
      1
      0
      0
    
    
      889
      1
      26
      30
      1
      0
      0
      0
      0
      1
      0
    
    
      890
      0
      32
      7
      0
      1
      0
      0
      0
      0
      0
    
  

891 rows × 10 columns

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S

	Features	Coefficient Estimate
0	Age	-0.022065
1	Fare	0.001030
2	C	0.596758
3	Q	0.317246
4	Family	-0.207585
5	Child	1.752809
6	Female	2.743579
7	Class_1	1.957364
8	Class_2	1.116120

	Survived	Age	Fare	C	Q	Family	Child	Female	Class_1	Class_2
0	0	22	7	0	0	1	0	0	0	0
1	1	38	71	1	0	1	0	1	1	0
2	1	26	7	0	0	0	0	1	0	0
3	1	35	53	0	0	1	0	1	1	0
4	0	35	8	0	0	0	0	0	0	0
5	0	42	8	0	1	0	0	0	0	0
6	0	54	51	0	0	0	0	0	1	0
7	0	2	21	0	0	1	1	0	0	0
8	1	27	11	0	0	1	0	1	0	0
9	1	14	30	1	0	1	1	0	0	1
10	1	4	16	0	0	1	1	0	0	0
11	1	58	26	0	0	0	0	1	1	0
12	0	20	8	0	0	0	0	0	0	0
13	0	39	31	0	0	1	0	0	0	0
14	0	14	7	0	0	0	1	0	0	0
15	1	55	16	0	0	0	0	1	0	1
16	0	2	29	0	1	1	1	0	0	0
17	1	26	13	0	0	0	0	0	0	1
18	0	31	18	0	0	1	0	1	0	0
19	1	34	7	1	0	0	0	1	0	0
20	0	35	26	0	0	0	0	0	0	1
21	1	34	13	0	0	0	0	0	0	1
22	1	15	8	0	1	0	1	0	0	0
23	1	28	35	0	0	0	0	0	1	0
24	0	8	21	0	0	1	1	0	0	0
25	1	38	31	0	0	1	0	1	0	0
26	0	39	7	1	0	0	0	0	0	0
27	0	19	263	0	0	1	0	0	1	0
28	1	42	7	0	1	0	0	1	0	0
29	0	21	7	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...
861	0	21	11	0	0	1	0	0	0	1
862	1	48	25	0	0	0	0	1	1	0
863	0	29	69	0	0	1	0	1	0	0
864	0	24	13	0	0	0	0	0	0	1
865	1	42	13	0	0	0	0	1	0	1
866	1	27	13	1	0	1	0	1	0	1
867	0	31	50	0	0	0	0	0	1	0
868	0	34	9	0	0	0	0	0	0	0
869	1	4	11	0	0	1	1	0	0	0
870	0	26	7	0	0	0	0	0	0	0
871	1	47	52	0	0	1	0	1	1	0
872	0	33	5	0	0	0	0	0	1	0
873	0	47	9	0	0	0	0	0	0	0
874	1	28	24	1	0	1	0	1	0	1
875	1	15	7	1	0	0	1	0	0	0
876	0	20	9	0	0	0	0	0	0	0
877	0	19	7	0	0	0	0	0	0	0
878	0	30	7	0	0	0	0	0	0	0
879	1	56	83	1	0	1	0	1	1	0
880	1	25	26	0	0	1	0	1	0	1
881	0	33	7	0	0	0	0	0	0	0
882	0	22	10	0	0	0	0	1	0	0
883	0	28	10	0	0	0	0	0	0	1
884	0	25	7	0	0	0	0	0	0	0
885	0	39	29	0	1	1	0	1	0	0
886	0	27	13	0	0	0	0	0	0	1
887	1	19	30	0	0	0	0	1	1	0
888	0	30	23	0	0	1	0	1	0	0
889	1	26	30	1	0	0	0	0	1	0
890	0	32	7	0	1	0	0	0	0	0

	Survived	Age	Fare	C	Q	Family	Child	Female	Class_1	Class_2
0	0	22	7	0	0	1	0	0	0	0
1	1	38	71	1	0	1	0	1	1	0
2	1	26	7	0	0	0	0	1	0	0
3	1	35	53	0	0	1	0	1	1	0
4	0	35	8	0	0	0	0	0	0	0
5	0	42	8	0	1	0	0	0	0	0
6	0	54	51	0	0	0	0	0	1	0
7	0	2	21	0	0	1	1	0	0	0
8	1	27	11	0	0	1	0	1	0	0
9	1	14	30	1	0	1	1	0	0	1
10	1	4	16	0	0	1	1	0	0	0
11	1	58	26	0	0	0	0	1	1	0
12	0	20	8	0	0	0	0	0	0	0
13	0	39	31	0	0	1	0	0	0	0
14	0	14	7	0	0	0	1	0	0	0
15	1	55	16	0	0	0	0	1	0	1
16	0	2	29	0	1	1	1	0	0	0
17	1	26	13	0	0	0	0	0	0	1
18	0	31	18	0	0	1	0	1	0	0
19	1	34	7	1	0	0	0	1	0	0
20	0	35	26	0	0	0	0	0	0	1
21	1	34	13	0	0	0	0	0	0	1
22	1	15	8	0	1	0	1	0	0	0
23	1	28	35	0	0	0	0	0	1	0
24	0	8	21	0	0	1	1	0	0	0
25	1	38	31	0	0	1	0	1	0	0
26	0	39	7	1	0	0	0	0	0	0
27	0	19	263	0	0	1	0	0	1	0
28	1	42	7	0	1	0	0	1	0	0
29	0	21	7	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...
861	0	21	11	0	0	1	0	0	0	1
862	1	48	25	0	0	0	0	1	1	0
863	0	29	69	0	0	1	0	1	0	0
864	0	24	13	0	0	0	0	0	0	1
865	1	42	13	0	0	0	0	1	0	1
866	1	27	13	1	0	1	0	1	0	1
867	0	31	50	0	0	0	0	0	1	0
868	0	34	9	0	0	0	0	0	0	0
869	1	4	11	0	0	1	1	0	0	0
870	0	26	7	0	0	0	0	0	0	0
871	1	47	52	0	0	1	0	1	1	0
872	0	33	5	0	0	0	0	0	1	0
873	0	47	9	0	0	0	0	0	0	0
874	1	28	24	1	0	1	0	1	0	1
875	1	15	7	1	0	0	1	0	0	0
876	0	20	9	0	0	0	0	0	0	0
877	0	19	7	0	0	0	0	0	0	0
878	0	30	7	0	0	0	0	0	0	0
879	1	56	83	1	0	1	0	1	1	0
880	1	25	26	0	0	1	0	1	0	1
881	0	33	7	0	0	0	0	0	0	0
882	0	22	10	0	0	0	0	1	0	0
883	0	28	10	0	0	0	0	0	0	1
884	0	25	7	0	0	0	0	0	0	0
885	0	39	29	0	1	1	0	1	0	0
886	0	27	13	0	0	0	0	0	0	1
887	1	19	30	0	0	0	0	1	1	0
888	0	30	23	0	0	1	0	1	0	0
889	1	26	30	1	0	0	0	0	1	0
890	0	32	7	0	1	0	0	0	0	0

	Survived	Age	Fare	C	Q	Family	Child	Female	Class_1	Class_2
0	0	22	7	0	0	1	0	0	0	0
1	1	38	71	1	0	1	0	1	1	0
2	1	26	7	0	0	0	0	1	0	0
3	1	35	53	0	0	1	0	1	1	0
4	0	35	8	0	0	0	0	0	0	0
5	0	42	8	0	1	0	0	0	0	0
6	0	54	51	0	0	0	0	0	1	0
7	0	2	21	0	0	1	1	0	0	0
8	1	27	11	0	0	1	0	1	0	0
9	1	14	30	1	0	1	1	0	0	1
10	1	4	16	0	0	1	1	0	0	0
11	1	58	26	0	0	0	0	1	1	0
12	0	20	8	0	0	0	0	0	0	0
13	0	39	31	0	0	1	0	0	0	0
14	0	14	7	0	0	0	1	0	0	0
15	1	55	16	0	0	0	0	1	0	1
16	0	2	29	0	1	1	1	0	0	0
17	1	26	13	0	0	0	0	0	0	1
18	0	31	18	0	0	1	0	1	0	0
19	1	34	7	1	0	0	0	1	0	0
20	0	35	26	0	0	0	0	0	0	1
21	1	34	13	0	0	0	0	0	0	1
22	1	15	8	0	1	0	1	0	0	0
23	1	28	35	0	0	0	0	0	1	0
24	0	8	21	0	0	1	1	0	0	0
25	1	38	31	0	0	1	0	1	0	0
26	0	39	7	1	0	0	0	0	0	0
27	0	19	263	0	0	1	0	0	1	0
28	1	42	7	0	1	0	0	1	0	0
29	0	21	7	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...
861	0	21	11	0	0	1	0	0	0	1
862	1	48	25	0	0	0	0	1	1	0
863	0	29	69	0	0	1	0	1	0	0
864	0	24	13	0	0	0	0	0	0	1
865	1	42	13	0	0	0	0	1	0	1
866	1	27	13	1	0	1	0	1	0	1
867	0	31	50	0	0	0	0	0	1	0
868	0	34	9	0	0	0	0	0	0	0
869	1	4	11	0	0	1	1	0	0	0
870	0	26	7	0	0	0	0	0	0	0
871	1	47	52	0	0	1	0	1	1	0
872	0	33	5	0	0	0	0	0	1	0
873	0	47	9	0	0	0	0	0	0	0
874	1	28	24	1	0	1	0	1	0	1
875	1	15	7	1	0	0	1	0	0	0
876	0	20	9	0	0	0	0	0	0	0
877	0	19	7	0	0	0	0	0	0	0
878	0	30	7	0	0	0	0	0	0	0
879	1	56	83	1	0	1	0	1	1	0
880	1	25	26	0	0	1	0	1	0	1
881	0	33	7	0	0	0	0	0	0	0
882	0	22	10	0	0	0	0	1	0	0
883	0	28	10	0	0	0	0	0	0	1
884	0	25	7	0	0	0	0	0	0	0
885	0	39	29	0	1	1	0	1	0	0
886	0	27	13	0	0	0	0	0	0	1
887	1	19	30	0	0	0	0	1	1	0
888	0	30	23	0	0	1	0	1	0	0
889	1	26	30	1	0	0	0	0	1	0
890	0	32	7	0	1	0	0	0	0	0