In [1]:
import pandas as pd
from pandas import Series, DataFrame

In [2]:
titanic_df = pd.read_csv('train.csv')

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [7]:
sns.factorplot('Sex', data=titanic_df, kind="count")


Out[7]:
<seaborn.axisgrid.FacetGrid at 0x119ff6fd0>

In [8]:
sns.factorplot('Sex', data=titanic_df, kind="count", hue='Pclass')


Out[8]:
<seaborn.axisgrid.FacetGrid at 0x117ad7ed0>

In [9]:
sns.factorplot('Pclass', data=titanic_df, kind="count", hue='Sex')


Out[9]:
<seaborn.axisgrid.FacetGrid at 0x11a932850>

In [11]:
def male_female_child(passenger):
    age,sex = passenger
    if age < 16:
        return 'child'
    else:
        return sex

In [12]:
titanic_df['person'] = titanic_df[['Age', 'Sex']].apply(male_female_child, axis=1)

In [13]:
titanic_df[0:10]


Out[13]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked person
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S male
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C female
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S female
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S female
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S male
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q male
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S male
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S child
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S female
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C child

In [14]:
sns.factorplot('Pclass', data=titanic_df, kind="count", hue='person')


Out[14]:
<seaborn.axisgrid.FacetGrid at 0x11ab01790>

In [16]:
titanic_df['Age'].hist(bins=70)


Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x11aed3610>

In [17]:
titanic_df['Age'].mean()


Out[17]:
29.69911764705882

In [18]:
titanic_df['person'].value_counts()


Out[18]:
male      537
female    271
child      83
Name: person, dtype: int64

In [19]:
fig = sns.FacetGrid(titanic_df, hue='Sex', aspect=4)
fig.map(sns.kdeplot, 'Age', shade=True)

oldest = titanic_df['Age'].max()
fig.set(xlim=(0,oldest))

fig.add_legend()


Out[19]:
<seaborn.axisgrid.FacetGrid at 0x11b022990>

In [21]:
fig = sns.FacetGrid(titanic_df, hue='person', aspect=4)
fig.map(sns.kdeplot, 'Age', shade=True)

oldest = titanic_df['Age'].max()
fig.set(xlim=(0,oldest))

fig.add_legend()


Out[21]:
<seaborn.axisgrid.FacetGrid at 0x11b9e1350>

In [23]:
fig = sns.FacetGrid(titanic_df, hue='Pclass', aspect=4)
fig.map(sns.kdeplot, 'Age', shade=True)

oldest = titanic_df['Age'].max()
fig.set(xlim=(0,oldest))

fig.add_legend()


Out[23]:
<seaborn.axisgrid.FacetGrid at 0x11b9e1d90>

In [24]:
deck = titanic_df['Cabin'].dropna()

In [25]:
levels = []

for level in deck:
    levels.append(level[0])

In [26]:
levels


Out[26]:
['C',
 'C',
 'E',
 'G',
 'C',
 'D',
 'A',
 'C',
 'B',
 'D',
 'B',
 'C',
 'B',
 'C',
 'F',
 'F',
 'C',
 'E',
 'A',
 'D',
 'D',
 'C',
 'B',
 'E',
 'D',
 'F',
 'D',
 'C',
 'B',
 'F',
 'C',
 'E',
 'B',
 'A',
 'C',
 'F',
 'A',
 'F',
 'B',
 'B',
 'G',
 'A',
 'D',
 'D',
 'C',
 'C',
 'C',
 'D',
 'G',
 'C',
 'B',
 'E',
 'B',
 'C',
 'C',
 'C',
 'D',
 'A',
 'B',
 'D',
 'C',
 'C',
 'B',
 'E',
 'C',
 'C',
 'E',
 'C',
 'B',
 'C',
 'E',
 'C',
 'D',
 'B',
 'C',
 'C',
 'C',
 'E',
 'T',
 'F',
 'C',
 'F',
 'C',
 'E',
 'D',
 'B',
 'E',
 'C',
 'B',
 'D',
 'G',
 'C',
 'E',
 'C',
 'E',
 'B',
 'C',
 'A',
 'C',
 'C',
 'C',
 'E',
 'D',
 'E',
 'E',
 'D',
 'A',
 'B',
 'C',
 'B',
 'C',
 'D',
 'C',
 'B',
 'C',
 'E',
 'D',
 'F',
 'B',
 'B',
 'C',
 'B',
 'B',
 'B',
 'C',
 'C',
 'A',
 'E',
 'C',
 'E',
 'E',
 'C',
 'A',
 'E',
 'B',
 'D',
 'A',
 'C',
 'F',
 'D',
 'D',
 'D',
 'A',
 'B',
 'B',
 'D',
 'A',
 'D',
 'E',
 'C',
 'B',
 'B',
 'D',
 'B',
 'B',
 'C',
 'F',
 'C',
 'E',
 'E',
 'C',
 'C',
 'C',
 'F',
 'C',
 'E',
 'E',
 'B',
 'B',
 'D',
 'C',
 'B',
 'B',
 'D',
 'E',
 'B',
 'B',
 'D',
 'E',
 'F',
 'B',
 'B',
 'D',
 'B',
 'D',
 'B',
 'A',
 'E',
 'B',
 'B',
 'E',
 'B',
 'E',
 'C',
 'C',
 'D',
 'E',
 'D',
 'A',
 'D',
 'B',
 'C',
 'B',
 'C']

In [27]:
cabin_df = DataFrame(levels)

In [29]:
cabin_df.columns = ['Cabin']
sns.factorplot('Cabin', data=cabin_df, palette = 'winter_d', kind='count')


Out[29]:
<seaborn.axisgrid.FacetGrid at 0x11c0bba90>

In [30]:
cabin_df = cabin_df[cabin_df.Cabin != 'T']
sns.factorplot('Cabin', data=cabin_df, palette = 'winter_d', kind='count')


Out[30]:
<seaborn.axisgrid.FacetGrid at 0x11bb06950>

In [33]:
sns.factorplot('Embarked', data=titanic_df, hue='Pclass', kind='count', order=['C', 'Q', 'S'])


Out[33]:
<seaborn.axisgrid.FacetGrid at 0x11c74cad0>

In [34]:
titanic_df['Alone'] = titanic_df.SibSp + titanic_df.Parch

In [35]:
titanic_df['Alone'].loc[titanic_df['Alone'] > 0] = 'With Family'
titanic_df['Alone'].loc[titanic_df['Alone'] == 0] = 'Alone'


/Users/andymeyers/anaconda/lib/python2.7/site-packages/pandas/core/indexing.py:132: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)

In [38]:
sns.factorplot('Alone', data=titanic_df, palette = 'Blues', kind='count')


Out[38]:
<seaborn.axisgrid.FacetGrid at 0x11c8a5450>

In [39]:
titanic_df['Survivor'] = titanic_df.Survived.map({0:'no', 1: 'yes'})

sns.factorplot('Survivor', data=titanic_df, palette = 'Set1', kind='count')


Out[39]:
<seaborn.axisgrid.FacetGrid at 0x11b506790>

In [41]:
sns.factorplot('Pclass', 'Survived', data=titanic_df, palette = 'Set1')


Out[41]:
<seaborn.axisgrid.FacetGrid at 0x11c09cad0>

In [42]:
sns.factorplot('Sex', 'Survived', data=titanic_df, palette = 'Set1')


Out[42]:
<seaborn.axisgrid.FacetGrid at 0x11cb21490>

In [44]:
sns.factorplot('Pclass', 'Survived', data=titanic_df, palette = 'Set1', hue='person')


Out[44]:
<seaborn.axisgrid.FacetGrid at 0x11d37d190>

In [45]:
sns.lmplot('Age', 'Survived', data=titanic_df)


Out[45]:
<seaborn.axisgrid.FacetGrid at 0x11d453390>

In [46]:
sns.lmplot('Age', 'Survived', data=titanic_df, hue='Pclass')


Out[46]:
<seaborn.axisgrid.FacetGrid at 0x11d85d710>

In [47]:
generations = [10, 20, 30, 40, 50, 60, 70, 80]
sns.lmplot('Age', 'Survived', data=titanic_df, hue='Pclass', x_bins=generations)


Out[47]:
<seaborn.axisgrid.FacetGrid at 0x11d85d050>

In [48]:
sns.lmplot('Age', 'Survived', data=titanic_df, hue='Sex', x_bins=generations)


Out[48]:
<seaborn.axisgrid.FacetGrid at 0x11dafb650>

In [49]:
sns.lmplot('Age', 'Survived', data=titanic_df, hue='Alone', x_bins=generations)


Out[49]:
<seaborn.axisgrid.FacetGrid at 0x11d0bebd0>

In [52]:
sns.factorplot('Pclass', 'Survived', data=titanic_df, palette = 'Set1', hue='Alone')


Out[52]:
<seaborn.axisgrid.FacetGrid at 0x11e716850>

In [53]:
sns.factorplot('Sex', 'Survived', data=titanic_df, palette = 'Set1', hue='Alone')


Out[53]:
<seaborn.axisgrid.FacetGrid at 0x11f277750>

In [ ]: