In [27]:
%matplotlib inline
import pandas as pd
import numpy as np
import pylab as plt
import seaborn

# Set the global default size of matplotlib figures
plt.rc('figure', figsize=(10, 5))

# Set seaborn aesthetic parameters to defaults
seaborn.set()

In [93]:
df_train = pd.read_csv('titanic/train(1).csv')
df_train


Out[93]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 G6 S
11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 C103 S
12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 NaN S
13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 NaN S
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 0 350406 7.8542 NaN S
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 NaN S
16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 NaN Q
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 NaN S
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 0 345763 18.0000 NaN S
19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 NaN C
20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 NaN S
21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 D56 S
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.0 0 0 330923 8.0292 NaN Q
23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 A6 S
24 25 0 3 Palsson, Miss. Torborg Danira female 8.0 3 1 349909 21.0750 NaN S
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 5 347077 31.3875 NaN S
26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 NaN C
27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 C23 C25 C27 S
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 NaN Q
29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 NaN S
... ... ... ... ... ... ... ... ... ... ... ... ...
861 862 0 2 Giles, Mr. Frederick Edward male 21.0 1 0 28134 11.5000 NaN S
862 863 1 1 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 0 17466 25.9292 D17 S
863 864 0 3 Sage, Miss. Dorothy Edith "Dolly" female NaN 8 2 CA. 2343 69.5500 NaN S
864 865 0 2 Gill, Mr. John William male 24.0 0 0 233866 13.0000 NaN S
865 866 1 2 Bystrom, Mrs. (Karolina) female 42.0 0 0 236852 13.0000 NaN S
866 867 1 2 Duran y More, Miss. Asuncion female 27.0 1 0 SC/PARIS 2149 13.8583 NaN C
867 868 0 1 Roebling, Mr. Washington Augustus II male 31.0 0 0 PC 17590 50.4958 A24 S
868 869 0 3 van Melkebeke, Mr. Philemon male NaN 0 0 345777 9.5000 NaN S
869 870 1 3 Johnson, Master. Harold Theodor male 4.0 1 1 347742 11.1333 NaN S
870 871 0 3 Balkic, Mr. Cerin male 26.0 0 0 349248 7.8958 NaN S
871 872 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 1 11751 52.5542 D35 S
872 873 0 1 Carlsson, Mr. Frans Olof male 33.0 0 0 695 5.0000 B51 B53 B55 S
873 874 0 3 Vander Cruyssen, Mr. Victor male 47.0 0 0 345765 9.0000 NaN S
874 875 1 2 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 0 P/PP 3381 24.0000 NaN C
875 876 1 3 Najib, Miss. Adele Kiamie "Jane" female 15.0 0 0 2667 7.2250 NaN C
876 877 0 3 Gustafsson, Mr. Alfred Ossian male 20.0 0 0 7534 9.8458 NaN S
877 878 0 3 Petroff, Mr. Nedelio male 19.0 0 0 349212 7.8958 NaN S
878 879 0 3 Laleff, Mr. Kristo male NaN 0 0 349217 7.8958 NaN S
879 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 1 11767 83.1583 C50 C
880 881 1 2 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 1 230433 26.0000 NaN S
881 882 0 3 Markun, Mr. Johann male 33.0 0 0 349257 7.8958 NaN S
882 883 0 3 Dahlberg, Miss. Gerda Ulrika female 22.0 0 0 7552 10.5167 NaN S
883 884 0 2 Banfield, Mr. Frederick James male 28.0 0 0 C.A./SOTON 34068 10.5000 NaN S
884 885 0 3 Sutehall, Mr. Henry Jr male 25.0 0 0 SOTON/OQ 392076 7.0500 NaN S
885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 NaN Q
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 NaN S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 12 columns


In [154]:
def clean_data(df):
    
    # Get the unique values of Sex
    sexes = np.sort(df['Sex'].unique())
    

     # Generate a mapping of Sex from a string to a number representation    
    genders_mapping = dict(zip(sexes, range(0, len(sexes) + 1)))
    
     
    # Transform Sex from a string to a number representation
    df['Sex_Val'] = df['Sex'].map(genders_mapping).astype(int)
   
    
     # Get the unique values of Embarked
    embarked_locs = np.sort(df['Embarked'].unique())
   
    # Generate a mapping of Embarked from a string to a number representation        
    embarked_locs_mapping = dict(zip(embarked_locs, range(0, len(embarked_locs) + 1)))
   
      
    # Transform Embarked from a string to dummy variables
    df = pd.concat([df, pd.get_dummies(df['Embarked'], prefix='Embarked_Val')], axis=1)
    
    
    ## Fill in missing values of Embarked
    # Since the vast majority of passengers embarked in 'S': 3, 
    # we assign the missing values in Embarked to 'S':
    if len(df[df['Embarked'].isnull()] > 0):
        df.replace({'Embarked_Val' : 
                       { embarked_locs_mapping[np.nan] : embarked_locs_mapping['S'] 
                       }
                   }, 
                   inplace=True)
        
    # Fill in missing values of Fare with the average Fare
    if len(df[df['Fare'].isnull()] > 0):
        avg_fare = df['Fare'].mean()
        df.replace({ None: avg_fare }, inplace=True)
        
        
    # To keep Age in tact, make a copy of it called AgeFill 
    # that we will use to fill in the missing ages:
    df['AgeFill'] = df['Age']    
    
    # Determine the Age typical for each passenger class by Sex_Val.  
    # We'll use the median instead of the mean because the Age 
    # histogram seems to be right skewed.
    df['AgeFill'] = df['AgeFill'].groupby([df['Sex_Val'], df['Pclass']]).apply(lambda x: x.fillna(x.median()))
    
    # Define a new feature FamilySize that is the sum of 
    # Parch (number of parents or children on board) and 
    # SibSp (number of siblings or spouses):
    df['FamilySize'] = df['SibSp'] + df['Parch']
    
    return df
df_train = clean_data(df_train)
df_train


Out[154]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare ... Embarked Sex_Val Embarked_Val_C Embarked_Val_Q Embarked_Val_S AgeFill FamilySize Embarked_Val_C Embarked_Val_Q Embarked_Val_S
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 ... S 1 0.0 0.0 1.0 22.0 1 0.0 0.0 1.0
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 ... C 0 1.0 0.0 0.0 38.0 1 1.0 0.0 0.0
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 ... S 0 0.0 0.0 1.0 26.0 0 0.0 0.0 1.0
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 ... S 0 0.0 0.0 1.0 35.0 1 0.0 0.0 1.0
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 ... S 1 0.0 0.0 1.0 35.0 0 0.0 0.0 1.0
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 ... Q 1 0.0 1.0 0.0 25.0 0 0.0 1.0 0.0
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 ... S 1 0.0 0.0 1.0 54.0 0 0.0 0.0 1.0
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 ... S 1 0.0 0.0 1.0 2.0 4 0.0 0.0 1.0
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 ... S 0 0.0 0.0 1.0 27.0 2 0.0 0.0 1.0
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 ... C 0 1.0 0.0 0.0 14.0 1 1.0 0.0 0.0
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 ... S 0 0.0 0.0 1.0 4.0 2 0.0 0.0 1.0
11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 ... S 0 0.0 0.0 1.0 58.0 0 0.0 0.0 1.0
12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 ... S 1 0.0 0.0 1.0 20.0 0 0.0 0.0 1.0
13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 ... S 1 0.0 0.0 1.0 39.0 6 0.0 0.0 1.0
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 0 350406 7.8542 ... S 0 0.0 0.0 1.0 14.0 0 0.0 0.0 1.0
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 ... S 0 0.0 0.0 1.0 55.0 0 0.0 0.0 1.0
16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 ... Q 1 0.0 1.0 0.0 2.0 5 0.0 1.0 0.0
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 ... S 1 0.0 0.0 1.0 30.0 0 0.0 0.0 1.0
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 0 345763 18.0000 ... S 0 0.0 0.0 1.0 31.0 1 0.0 0.0 1.0
19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 ... C 0 1.0 0.0 0.0 21.5 0 1.0 0.0 0.0
20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 ... S 1 0.0 0.0 1.0 35.0 0 0.0 0.0 1.0
21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 ... S 1 0.0 0.0 1.0 34.0 0 0.0 0.0 1.0
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.0 0 0 330923 8.0292 ... Q 0 0.0 1.0 0.0 15.0 0 0.0 1.0 0.0
23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 ... S 1 0.0 0.0 1.0 28.0 0 0.0 0.0 1.0
24 25 0 3 Palsson, Miss. Torborg Danira female 8.0 3 1 349909 21.0750 ... S 0 0.0 0.0 1.0 8.0 4 0.0 0.0 1.0
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 5 347077 31.3875 ... S 0 0.0 0.0 1.0 38.0 6 0.0 0.0 1.0
26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 ... C 1 1.0 0.0 0.0 25.0 0 1.0 0.0 0.0
27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 ... S 1 0.0 0.0 1.0 19.0 5 0.0 0.0 1.0
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 ... Q 0 0.0 1.0 0.0 21.5 0 0.0 1.0 0.0
29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 ... S 1 0.0 0.0 1.0 25.0 0 0.0 0.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
861 862 0 2 Giles, Mr. Frederick Edward male 21.0 1 0 28134 11.5000 ... S 1 0.0 0.0 1.0 21.0 1 0.0 0.0 1.0
862 863 1 1 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 0 17466 25.9292 ... S 0 0.0 0.0 1.0 48.0 0 0.0 0.0 1.0
863 864 0 3 Sage, Miss. Dorothy Edith "Dolly" female NaN 8 2 CA. 2343 69.5500 ... S 0 0.0 0.0 1.0 21.5 10 0.0 0.0 1.0
864 865 0 2 Gill, Mr. John William male 24.0 0 0 233866 13.0000 ... S 1 0.0 0.0 1.0 24.0 0 0.0 0.0 1.0
865 866 1 2 Bystrom, Mrs. (Karolina) female 42.0 0 0 236852 13.0000 ... S 0 0.0 0.0 1.0 42.0 0 0.0 0.0 1.0
866 867 1 2 Duran y More, Miss. Asuncion female 27.0 1 0 SC/PARIS 2149 13.8583 ... C 0 1.0 0.0 0.0 27.0 1 1.0 0.0 0.0
867 868 0 1 Roebling, Mr. Washington Augustus II male 31.0 0 0 PC 17590 50.4958 ... S 1 0.0 0.0 1.0 31.0 0 0.0 0.0 1.0
868 869 0 3 van Melkebeke, Mr. Philemon male NaN 0 0 345777 9.5000 ... S 1 0.0 0.0 1.0 25.0 0 0.0 0.0 1.0
869 870 1 3 Johnson, Master. Harold Theodor male 4.0 1 1 347742 11.1333 ... S 1 0.0 0.0 1.0 4.0 2 0.0 0.0 1.0
870 871 0 3 Balkic, Mr. Cerin male 26.0 0 0 349248 7.8958 ... S 1 0.0 0.0 1.0 26.0 0 0.0 0.0 1.0
871 872 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 1 11751 52.5542 ... S 0 0.0 0.0 1.0 47.0 2 0.0 0.0 1.0
872 873 0 1 Carlsson, Mr. Frans Olof male 33.0 0 0 695 5.0000 ... S 1 0.0 0.0 1.0 33.0 0 0.0 0.0 1.0
873 874 0 3 Vander Cruyssen, Mr. Victor male 47.0 0 0 345765 9.0000 ... S 1 0.0 0.0 1.0 47.0 0 0.0 0.0 1.0
874 875 1 2 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 0 P/PP 3381 24.0000 ... C 0 1.0 0.0 0.0 28.0 1 1.0 0.0 0.0
875 876 1 3 Najib, Miss. Adele Kiamie "Jane" female 15.0 0 0 2667 7.2250 ... C 0 1.0 0.0 0.0 15.0 0 1.0 0.0 0.0
876 877 0 3 Gustafsson, Mr. Alfred Ossian male 20.0 0 0 7534 9.8458 ... S 1 0.0 0.0 1.0 20.0 0 0.0 0.0 1.0
877 878 0 3 Petroff, Mr. Nedelio male 19.0 0 0 349212 7.8958 ... S 1 0.0 0.0 1.0 19.0 0 0.0 0.0 1.0
878 879 0 3 Laleff, Mr. Kristo male NaN 0 0 349217 7.8958 ... S 1 0.0 0.0 1.0 25.0 0 0.0 0.0 1.0
879 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 1 11767 83.1583 ... C 0 1.0 0.0 0.0 56.0 1 1.0 0.0 0.0
880 881 1 2 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 1 230433 26.0000 ... S 0 0.0 0.0 1.0 25.0 1 0.0 0.0 1.0
881 882 0 3 Markun, Mr. Johann male 33.0 0 0 349257 7.8958 ... S 1 0.0 0.0 1.0 33.0 0 0.0 0.0 1.0
882 883 0 3 Dahlberg, Miss. Gerda Ulrika female 22.0 0 0 7552 10.5167 ... S 0 0.0 0.0 1.0 22.0 0 0.0 0.0 1.0
883 884 0 2 Banfield, Mr. Frederick James male 28.0 0 0 C.A./SOTON 34068 10.5000 ... S 1 0.0 0.0 1.0 28.0 0 0.0 0.0 1.0
884 885 0 3 Sutehall, Mr. Henry Jr male 25.0 0 0 SOTON/OQ 392076 7.0500 ... S 1 0.0 0.0 1.0 25.0 0 0.0 0.0 1.0
885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 ... Q 0 0.0 1.0 0.0 39.0 5 0.0 1.0 0.0
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 ... S 1 0.0 0.0 1.0 27.0 0 0.0 0.0 1.0
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 ... S 0 0.0 0.0 1.0 19.0 0 0.0 0.0 1.0
888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 ... S 0 0.0 0.0 1.0 21.5 3 0.0 0.0 1.0
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 ... C 1 1.0 0.0 0.0 26.0 0 1.0 0.0 0.0
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 ... Q 1 0.0 1.0 0.0 32.0 0 0.0 1.0 0.0

891 rows × 21 columns


In [79]:
# Size of matplotlib figures that contain subplots
figsize_with_subplots = (10, 10)

# Set up a grid of plots
fig = plt.figure(figsize=figsize_with_subplots) 
fig_dims = (3, 2)

# Plot death and survival counts
plt.subplot2grid(fig_dims, (0, 0))
df_train['Survived'].value_counts().plot(kind='bar', 
                                         title='Death and Survival Counts',
                                         color='b',
                                         align='center')
print df_train['Survived'].value_counts()


0    549
1    342
Name: Survived, dtype: int64

In [83]:
# Plot Pclass counts
plt.subplot2grid(fig_dims, (0, 1))
df_train['Pclass'].value_counts().plot(kind='bar', 
                                       title='Passenger Class Counts')


Out[83]:
<matplotlib.axes._subplots.AxesSubplot at 0x1235eba10>

In [86]:
# Plot Sex counts
plt.subplot2grid(fig_dims, (1, 0))
df_train['Sex'].value_counts().plot(kind='bar', 
                                    title='Gender Counts')
plt.xticks(rotation=0)


Out[86]:
(array([0, 1]), <a list of 2 Text xticklabel objects>)

In [87]:
# Plot Embarked counts
plt.subplot2grid(fig_dims, (1, 1))
df_train['Embarked'].value_counts().plot(kind='bar', 
                                         title='Ports of Embarkation Counts')


Out[87]:
<matplotlib.axes._subplots.AxesSubplot at 0x123efa650>

In [98]:
# Plot the Age histogram
plt.subplot2grid(fig_dims, (2, 0))
df_train['Age'].hist()
plt.title('Age Histogram')


Out[98]:
<matplotlib.text.Text at 0x12470bed0>

In [121]:
# Get the unique values of Embarked and its maximum
family_sizes = (df_train.FamilySize)
family_size_max = max(family_sizes)

df1 = df_train[df_train['Survived'] == 0]['FamilySize']
df2 = df_train[df_train['Survived'] == 1]['FamilySize']
plt.hist([df1, df2], 
         bins=family_size_max + 1, 
         range=(0, family_size_max), 
         stacked= False)
plt.legend(('Died', 'Survived'), loc='best')
plt.title('Survivors by Family Size')


Out[121]:
<matplotlib.text.Text at 0x123754dd0>

In [126]:
pclass_xt = pd.crosstab(df_train['Pclass'], df_train['Survived'])
pclass_xt_pct = pclass_xt.div(pclass_xt.sum(1).astype(float), axis=0)
pclass_xt_pct


Out[126]:
Survived 0 1
Pclass
1 0.370370 0.629630
2 0.527174 0.472826
3 0.757637 0.242363

In [127]:
pclass_xt_pct.plot(kind='bar', 
                   stacked=True, 
                   title='Survival Rate by Passenger Classes')
plt.xlabel('Passenger Class')
plt.ylabel('Survival Rate')


Out[127]:
<matplotlib.text.Text at 0x1239d6850>

In [128]:
# Plot survival rate by Sex
females_df = df_train[df_train['Sex'] == 'female']
females_xt = pd.crosstab(females_df['Pclass'], df_train['Survived'])
females_xt_pct = females_xt.div(females_xt.sum(1).astype(float), axis=0)
females_xt_pct.plot(kind='bar', 
                    stacked=True, 
                    title='Female Survival Rate by Passenger Class')
plt.xlabel('Passenger Class')
plt.ylabel('Survival Rate')


Out[128]:
<matplotlib.text.Text at 0x124c8ec90>

In [130]:
#Plot survival rate by Pclass
males_df = df_train[df_train['Sex'] == 'male']
males_xt = pd.crosstab(males_df['Pclass'], df_train['Survived'])
males_xt_pct = males_xt.div(males_xt.sum(1).astype(float), axis=0)
males_xt_pct.plot(kind='bar', 
                  stacked=True, 
                  title='Male Survival Rate by Passenger Class')
plt.xlabel('Passenger Class')
plt.ylabel('Survival Rate')


Out[130]:
<matplotlib.text.Text at 0x124ebe790>

In [143]:
# Set up a grid of plots
fig, axes = plt.subplots(2, 1, figsize=figsize_with_subplots)


# Histogram of AgeFill segmented by Survived
df1 = df_train[df_train['Survived'] == 0]['Age']
df2 = df_train[df_train['Survived'] == 1]['Age']
max_age = max(df_train['AgeFill'])

axes[1].hist([df1, df2], 
             bins=max_age/5, 
             range=(1, max_age), 
             stacked=True)
axes[1].legend(('Died', 'Survived'), loc='best')
axes[1].set_title('Survivors by Age Groups Histogram')
axes[1].set_xlabel('Age')
axes[1].set_ylabel('Count')

# Scatter plot Survived and AgeFill
axes[0].scatter(df_train['Survived'], df_train['AgeFill'])
axes[0].set_title('Survivors by Age Plot')
axes[0].set_xlabel('Survived')
axes[0].set_ylabel('Age')


Out[143]:
<matplotlib.text.Text at 0x125421ed0>

In [153]:
# Get the unique values of Pclass:
passenger_classes = np.sort(df_train['Pclass'].unique())

for pclass in passenger_classes:
    df_train.AgeFill[df_train.Pclass == pclass].plot(kind='kde')
plt.title('Age Density Plot by Passenger Class')
plt.xlabel('Age')
plt.ylabel('Density')
plt.legend(('1st Class', '2nd Class', '3rd Class'), loc='best')


Out[153]:
<matplotlib.legend.Legend at 0x12ce5c9d0>

In [157]:
df_train['FareRound'] = df_train['Fare'].round()

In [171]:
df1 = df_train[df_train['Survived'] == 0]['FareRound']
df2 = df_train[df_train['Survived'] == 1]['FareRound']
max_fare = max(df_train['FareRound'])
print max_fare

df_train['FareRound'][df_train['Survived'] == 0].plot(kind='kde')
df_train['FareRound'][df_train['Survived'] == 1].plot(kind='kde')

plt.legend(('Fare', 'Survived'), loc='best')
plt.title('Survivors by Fare Groups Histogram')


512.0
Out[171]:
<matplotlib.text.Text at 0x12d8c8ed0>

In [ ]: