notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np

df = pd.read_csv('Desktop/titanic/train.csv')



In [2]:

    
df.head(10)









    Out[2]:






  
    
      
      PassengerId
      Survived
      Pclass
      Name
      Sex
      Age
      SibSp
      Parch
      Ticket
      Fare
      Cabin
      Embarked
    
  
  
    
      0
      1
      0
      3
      Braund, Mr. Owen Harris
      male
      22.0
      1
      0
      A/5 21171
      7.2500
      NaN
      S
    
    
      1
      2
      1
      1
      Cumings, Mrs. John Bradley (Florence Briggs Th...
      female
      38.0
      1
      0
      PC 17599
      71.2833
      C85
      C
    
    
      2
      3
      1
      3
      Heikkinen, Miss. Laina
      female
      26.0
      0
      0
      STON/O2. 3101282
      7.9250
      NaN
      S
    
    
      3
      4
      1
      1
      Futrelle, Mrs. Jacques Heath (Lily May Peel)
      female
      35.0
      1
      0
      113803
      53.1000
      C123
      S
    
    
      4
      5
      0
      3
      Allen, Mr. William Henry
      male
      35.0
      0
      0
      373450
      8.0500
      NaN
      S
    
    
      5
      6
      0
      3
      Moran, Mr. James
      male
      NaN
      0
      0
      330877
      8.4583
      NaN
      Q
    
    
      6
      7
      0
      1
      McCarthy, Mr. Timothy J
      male
      54.0
      0
      0
      17463
      51.8625
      E46
      S
    
    
      7
      8
      0
      3
      Palsson, Master. Gosta Leonard
      male
      2.0
      3
      1
      349909
      21.0750
      NaN
      S
    
    
      8
      9
      1
      3
      Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
      female
      27.0
      0
      2
      347742
      11.1333
      NaN
      S
    
    
      9
      10
      1
      2
      Nasser, Mrs. Nicholas (Adele Achem)
      female
      14.0
      1
      0
      237736
      30.0708
      NaN
      C



In [3]:

    
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)



In [4]:

    
df.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 9 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Fare           891 non-null float64
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(2)
memory usage: 62.7+ KB



In [5]:

    
df = df.dropna()



In [6]:

    
df['Sex'].unique()









    Out[6]:





array(['male', 'female'], dtype=object)



In [7]:

    
df['Gender'] = df['Sex'].map({'female': 0, 'male':1}).astype(int)



In [8]:

    
df['Embarked'].unique()









    Out[8]:





array(['S', 'C', 'Q'], dtype=object)



In [9]:

    
df['Port'] = df['Embarked'].map({'C':1, 'S':2, 'Q':3}).astype(int)



In [10]:

    
df = df.drop(['Sex', 'Embarked'], axis=1)



In [11]:

    
cols = df.columns.tolist()
print(cols)









    



['PassengerId', 'Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Gender', 'Port']



In [12]:

    
cols = [cols[1]] + cols[0:1] + cols[2:]
df = df[cols]



In [13]:

    
df.head(10)









    Out[13]:






  
    
      
      Survived
      PassengerId
      Pclass
      Age
      SibSp
      Parch
      Fare
      Gender
      Port
    
  
  
    
      0
      0
      1
      3
      22.0
      1
      0
      7.2500
      1
      2
    
    
      1
      1
      2
      1
      38.0
      1
      0
      71.2833
      0
      1
    
    
      2
      1
      3
      3
      26.0
      0
      0
      7.9250
      0
      2
    
    
      3
      1
      4
      1
      35.0
      1
      0
      53.1000
      0
      2
    
    
      4
      0
      5
      3
      35.0
      0
      0
      8.0500
      1
      2
    
    
      6
      0
      7
      1
      54.0
      0
      0
      51.8625
      1
      2
    
    
      7
      0
      8
      3
      2.0
      3
      1
      21.0750
      1
      2
    
    
      8
      1
      9
      3
      27.0
      0
      2
      11.1333
      0
      2
    
    
      9
      1
      10
      2
      14.0
      1
      0
      30.0708
      0
      1
    
    
      10
      1
      11
      3
      4.0
      1
      1
      16.7000
      0
      2



In [14]:

    
df.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 712 entries, 0 to 890
Data columns (total 9 columns):
Survived       712 non-null int64
PassengerId    712 non-null int64
Pclass         712 non-null int64
Age            712 non-null float64
SibSp          712 non-null int64
Parch          712 non-null int64
Fare           712 non-null float64
Gender         712 non-null int64
Port           712 non-null int64
dtypes: float64(2), int64(7)
memory usage: 55.6 KB



In [15]:

    
train_data = df.values



In [27]:

    
#Using Random Forest to predict data
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators = 100)
print np.shape(train_data)



In [17]:

    
model = model.fit(train_data[0:,2:],train_data[0:,0])



In [28]:

    
df_test = pd.read_csv('Desktop/titanic/test.csv')



In [19]:

    
df_test.head(10)









    Out[19]:






  
    
      
      PassengerId
      Pclass
      Name
      Sex
      Age
      SibSp
      Parch
      Ticket
      Fare
      Cabin
      Embarked
    
  
  
    
      0
      892
      3
      Kelly, Mr. James
      male
      34.5
      0
      0
      330911
      7.8292
      NaN
      Q
    
    
      1
      893
      3
      Wilkes, Mrs. James (Ellen Needs)
      female
      47.0
      1
      0
      363272
      7.0000
      NaN
      S
    
    
      2
      894
      2
      Myles, Mr. Thomas Francis
      male
      62.0
      0
      0
      240276
      9.6875
      NaN
      Q
    
    
      3
      895
      3
      Wirz, Mr. Albert
      male
      27.0
      0
      0
      315154
      8.6625
      NaN
      S
    
    
      4
      896
      3
      Hirvonen, Mrs. Alexander (Helga E Lindqvist)
      female
      22.0
      1
      1
      3101298
      12.2875
      NaN
      S
    
    
      5
      897
      3
      Svensson, Mr. Johan Cervin
      male
      14.0
      0
      0
      7538
      9.2250
      NaN
      S
    
    
      6
      898
      3
      Connolly, Miss. Kate
      female
      30.0
      0
      0
      330972
      7.6292
      NaN
      Q
    
    
      7
      899
      2
      Caldwell, Mr. Albert Francis
      male
      26.0
      1
      1
      248738
      29.0000
      NaN
      S
    
    
      8
      900
      3
      Abrahim, Mrs. Joseph (Sophie Halaut Easu)
      female
      18.0
      0
      0
      2657
      7.2292
      NaN
      C
    
    
      9
      901
      3
      Davies, Mr. John Samuel
      male
      21.0
      2
      0
      A/4 48871
      24.1500
      NaN
      S



In [20]:

    
df_test = df_test.drop(['Name', 'Ticket', 'Cabin'], axis=1)

df_test = df_test.dropna()

df_test['Gender'] = df_test['Sex'].map({'female': 0, 'male':1})
df_test['Port'] = df_test['Embarked'].map({'C':1, 'S':2, 'Q':3})

df_test = df_test.drop(['Sex', 'Embarked'], axis=1)

test_data = df_test.values



In [21]:

    
df_test.head(10)









    Out[21]:






  
    
      
      PassengerId
      Pclass
      Age
      SibSp
      Parch
      Fare
      Gender
      Port
    
  
  
    
      0
      892
      3
      34.5
      0
      0
      7.8292
      1
      3
    
    
      1
      893
      3
      47.0
      1
      0
      7.0000
      0
      2
    
    
      2
      894
      2
      62.0
      0
      0
      9.6875
      1
      3
    
    
      3
      895
      3
      27.0
      0
      0
      8.6625
      1
      2
    
    
      4
      896
      3
      22.0
      1
      1
      12.2875
      0
      2
    
    
      5
      897
      3
      14.0
      0
      0
      9.2250
      1
      2
    
    
      6
      898
      3
      30.0
      0
      0
      7.6292
      0
      3
    
    
      7
      899
      2
      26.0
      1
      1
      29.0000
      1
      2
    
    
      8
      900
      3
      18.0
      0
      0
      7.2292
      0
      1
    
    
      9
      901
      3
      21.0
      2
      0
      24.1500
      1
      2



In [22]:

    
output = model.predict(test_data[:,1:])



In [23]:

    
result = np.c_[test_data[:,0].astype(int), output.astype(int)]
df_result = pd.DataFrame(result[:,0:2], columns=['PassengerId', 'Survived'])



In [24]:

    
df_result.head(10)









    Out[24]:






  
    
      
      PassengerId
      Survived
    
  
  
    
      0
      892
      0
    
    
      1
      893
      0
    
    
      2
      894
      0
    
    
      3
      895
      1
    
    
      4
      896
      0
    
    
      5
      897
      0
    
    
      6
      898
      0
    
    
      7
      899
      0
    
    
      8
      900
      1
    
    
      9
      901
      0



In [25]:

    
df_result.to_csv('Desktop/titanic/RandomForestresults.csv', index=False)



In [26]:

    
df_result.shape









    Out[26]:





(331, 2)



In [ ]:

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	0	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	0	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	0	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	0	373450	8.0500	NaN	S
5	6	0	3	Moran, Mr. James	male	NaN	0	0	330877	8.4583	NaN	Q
6	7	0	1	McCarthy, Mr. Timothy J	male	54.0	0	0	17463	51.8625	E46	S
7	8	0	3	Palsson, Master. Gosta Leonard	male	2.0	3	1	349909	21.0750	NaN	S
8	9	1	3	Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)	female	27.0	0	2	347742	11.1333	NaN	S
9	10	1	2	Nasser, Mrs. Nicholas (Adele Achem)	female	14.0	1	0	237736	30.0708	NaN	C

	PassengerId	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
0	892	3	Kelly, Mr. James	male	34.5	0	0	330911	7.8292	NaN	Q
1	893	3	Wilkes, Mrs. James (Ellen Needs)	female	47.0	1	0	363272	7.0000	NaN	S
2	894	2	Myles, Mr. Thomas Francis	male	62.0	0	0	240276	9.6875	NaN	Q
3	895	3	Wirz, Mr. Albert	male	27.0	0	0	315154	8.6625	NaN	S
4	896	3	Hirvonen, Mrs. Alexander (Helga E Lindqvist)	female	22.0	1	1	3101298	12.2875	NaN	S
5	897	3	Svensson, Mr. Johan Cervin	male	14.0	0	0	7538	9.2250	NaN	S
6	898	3	Connolly, Miss. Kate	female	30.0	0	0	330972	7.6292	NaN	Q
7	899	2	Caldwell, Mr. Albert Francis	male	26.0	1	1	248738	29.0000	NaN	S
8	900	3	Abrahim, Mrs. Joseph (Sophie Halaut Easu)	female	18.0	0	0	2657	7.2292	NaN	C
9	901	3	Davies, Mr. John Samuel	male	21.0	2	0	A/4 48871	24.1500	NaN	S