In [2]:
import numpy as np
import pandas as pd
from sklearn import preprocessing

In [3]:
df = pd.read_csv("train.csv")
df.loc[df["Sex"] == 'female',"Sex"] = 0
df.loc[df["Sex"] == 'male',"Sex"] = 1
print(len(df))
df = df.fillna(value="Not available")
df = df.drop("Cabin")
df = df.drop("")
df_train = df[0:600]
df_cross_validate = df[601:]
df_train
df_test = pd.read_csv("test.csv")


891
Out[3]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris 1 22 1 0 A/5 21171 7.2500 Not available S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... 0 38 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina 0 26 0 0 STON/O2. 3101282 7.9250 Not available S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) 0 35 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry 1 35 0 0 373450 8.0500 Not available S
5 6 0 3 Moran, Mr. James 1 Not available 0 0 330877 8.4583 Not available Q
6 7 0 1 McCarthy, Mr. Timothy J 1 54 0 0 17463 51.8625 E46 S
7 8 0 3 Palsson, Master. Gosta Leonard 1 2 3 1 349909 21.0750 Not available S
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) 0 27 0 2 347742 11.1333 Not available S
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) 0 14 1 0 237736 30.0708 Not available C
10 11 1 3 Sandstrom, Miss. Marguerite Rut 0 4 1 1 PP 9549 16.7000 G6 S
11 12 1 1 Bonnell, Miss. Elizabeth 0 58 0 0 113783 26.5500 C103 S
12 13 0 3 Saundercock, Mr. William Henry 1 20 0 0 A/5. 2151 8.0500 Not available S
13 14 0 3 Andersson, Mr. Anders Johan 1 39 1 5 347082 31.2750 Not available S
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina 0 14 0 0 350406 7.8542 Not available S
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) 0 55 0 0 248706 16.0000 Not available S
16 17 0 3 Rice, Master. Eugene 1 2 4 1 382652 29.1250 Not available Q
17 18 1 2 Williams, Mr. Charles Eugene 1 Not available 0 0 244373 13.0000 Not available S
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... 0 31 1 0 345763 18.0000 Not available S
19 20 1 3 Masselmani, Mrs. Fatima 0 Not available 0 0 2649 7.2250 Not available C
20 21 0 2 Fynney, Mr. Joseph J 1 35 0 0 239865 26.0000 Not available S
21 22 1 2 Beesley, Mr. Lawrence 1 34 0 0 248698 13.0000 D56 S
22 23 1 3 McGowan, Miss. Anna "Annie" 0 15 0 0 330923 8.0292 Not available Q
23 24 1 1 Sloper, Mr. William Thompson 1 28 0 0 113788 35.5000 A6 S
24 25 0 3 Palsson, Miss. Torborg Danira 0 8 3 1 349909 21.0750 Not available S
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... 0 38 1 5 347077 31.3875 Not available S
26 27 0 3 Emir, Mr. Farred Chehab 1 Not available 0 0 2631 7.2250 Not available C
27 28 0 1 Fortune, Mr. Charles Alexander 1 19 3 2 19950 263.0000 C23 C25 C27 S
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" 0 Not available 0 0 330959 7.8792 Not available Q
29 30 0 3 Todoroff, Mr. Lalio 1 Not available 0 0 349216 7.8958 Not available S
... ... ... ... ... ... ... ... ... ... ... ... ...
570 571 1 2 Harris, Mr. George 1 62 0 0 S.W./PP 752 10.5000 Not available S
571 572 1 1 Appleton, Mrs. Edward Dale (Charlotte Lamson) 0 53 2 0 11769 51.4792 C101 S
572 573 1 1 Flynn, Mr. John Irwin ("Irving") 1 36 0 0 PC 17474 26.3875 E25 S
573 574 1 3 Kelly, Miss. Mary 0 Not available 0 0 14312 7.7500 Not available Q
574 575 0 3 Rush, Mr. Alfred George John 1 16 0 0 A/4. 20589 8.0500 Not available S
575 576 0 3 Patchett, Mr. George 1 19 0 0 358585 14.5000 Not available S
576 577 1 2 Garside, Miss. Ethel 0 34 0 0 243880 13.0000 Not available S
577 578 1 1 Silvey, Mrs. William Baird (Alice Munger) 0 39 1 0 13507 55.9000 E44 S
578 579 0 3 Caram, Mrs. Joseph (Maria Elias) 0 Not available 1 0 2689 14.4583 Not available C
579 580 1 3 Jussila, Mr. Eiriik 1 32 0 0 STON/O 2. 3101286 7.9250 Not available S
580 581 1 2 Christy, Miss. Julie Rachel 0 25 1 1 237789 30.0000 Not available S
581 582 1 1 Thayer, Mrs. John Borland (Marian Longstreth M... 0 39 1 1 17421 110.8833 C68 C
582 583 0 2 Downton, Mr. William James 1 54 0 0 28403 26.0000 Not available S
583 584 0 1 Ross, Mr. John Hugo 1 36 0 0 13049 40.1250 A10 C
584 585 0 3 Paulner, Mr. Uscher 1 Not available 0 0 3411 8.7125 Not available C
585 586 1 1 Taussig, Miss. Ruth 0 18 0 2 110413 79.6500 E68 S
586 587 0 2 Jarvis, Mr. John Denzil 1 47 0 0 237565 15.0000 Not available S
587 588 1 1 Frolicher-Stehli, Mr. Maxmillian 1 60 1 1 13567 79.2000 B41 C
588 589 0 3 Gilinski, Mr. Eliezer 1 22 0 0 14973 8.0500 Not available S
589 590 0 3 Murdlin, Mr. Joseph 1 Not available 0 0 A./5. 3235 8.0500 Not available S
590 591 0 3 Rintamaki, Mr. Matti 1 35 0 0 STON/O 2. 3101273 7.1250 Not available S
591 592 1 1 Stephenson, Mrs. Walter Bertram (Martha Eustis) 0 52 1 0 36947 78.2667 D20 C
592 593 0 3 Elsbury, Mr. William James 1 47 0 0 A/5 3902 7.2500 Not available S
593 594 0 3 Bourke, Miss. Mary 0 Not available 0 2 364848 7.7500 Not available Q
594 595 0 2 Chapman, Mr. John Henry 1 37 1 0 SC/AH 29037 26.0000 Not available S
595 596 0 3 Van Impe, Mr. Jean Baptiste 1 36 1 1 345773 24.1500 Not available S
596 597 1 2 Leitch, Miss. Jessie Wills 0 Not available 0 0 248727 33.0000 Not available S
597 598 0 3 Johnson, Mr. Alfred 1 49 0 0 LINE 0.0000 Not available S
598 599 0 3 Boulos, Mr. Hanna 1 Not available 0 0 2664 7.2250 Not available C
599 600 1 1 Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan") 1 49 1 0 PC 17485 56.9292 A20 C

600 rows × 12 columns


In [4]:
Features = ["PassengerId","Survived","Pclass","Sex","Age","Fare","Embarked"]
Passanger_data = df["PassengerId"]
Survived_data = df["Survived"]
Pclass_data = df["Pclass"]
Sex_data = df["Sex"]
Age_data = df["Age"]
Fare_data = df["Fare"]
Embarked_data = df["Embarked"]

In [1]:
feautre_list = ["PassengerId","Survived","Sex","Age","Fare"]