In [1]:
import pandas as pd
from pandas import Series, DataFrame
In [2]:
titanic_df = pd.read_csv('train.csv')
In [4]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [7]:
sns.factorplot('Sex', data=titanic_df, kind="count")
Out[7]:
In [8]:
sns.factorplot('Sex', data=titanic_df, kind="count", hue='Pclass')
Out[8]:
In [9]:
sns.factorplot('Pclass', data=titanic_df, kind="count", hue='Sex')
Out[9]:
In [11]:
def male_female_child(passenger):
age,sex = passenger
if age < 16:
return 'child'
else:
return sex
In [12]:
titanic_df['person'] = titanic_df[['Age', 'Sex']].apply(male_female_child, axis=1)
In [13]:
titanic_df[0:10]
Out[13]:
In [14]:
sns.factorplot('Pclass', data=titanic_df, kind="count", hue='person')
Out[14]:
In [16]:
titanic_df['Age'].hist(bins=70)
Out[16]:
In [17]:
titanic_df['Age'].mean()
Out[17]:
In [18]:
titanic_df['person'].value_counts()
Out[18]:
In [19]:
fig = sns.FacetGrid(titanic_df, hue='Sex', aspect=4)
fig.map(sns.kdeplot, 'Age', shade=True)
oldest = titanic_df['Age'].max()
fig.set(xlim=(0,oldest))
fig.add_legend()
Out[19]:
In [21]:
fig = sns.FacetGrid(titanic_df, hue='person', aspect=4)
fig.map(sns.kdeplot, 'Age', shade=True)
oldest = titanic_df['Age'].max()
fig.set(xlim=(0,oldest))
fig.add_legend()
Out[21]:
In [23]:
fig = sns.FacetGrid(titanic_df, hue='Pclass', aspect=4)
fig.map(sns.kdeplot, 'Age', shade=True)
oldest = titanic_df['Age'].max()
fig.set(xlim=(0,oldest))
fig.add_legend()
Out[23]:
In [24]:
deck = titanic_df['Cabin'].dropna()
In [25]:
levels = []
for level in deck:
levels.append(level[0])
In [26]:
levels
Out[26]:
In [27]:
cabin_df = DataFrame(levels)
In [29]:
cabin_df.columns = ['Cabin']
sns.factorplot('Cabin', data=cabin_df, palette = 'winter_d', kind='count')
Out[29]:
In [30]:
cabin_df = cabin_df[cabin_df.Cabin != 'T']
sns.factorplot('Cabin', data=cabin_df, palette = 'winter_d', kind='count')
Out[30]:
In [33]:
sns.factorplot('Embarked', data=titanic_df, hue='Pclass', kind='count', order=['C', 'Q', 'S'])
Out[33]:
In [34]:
titanic_df['Alone'] = titanic_df.SibSp + titanic_df.Parch
In [35]:
titanic_df['Alone'].loc[titanic_df['Alone'] > 0] = 'With Family'
titanic_df['Alone'].loc[titanic_df['Alone'] == 0] = 'Alone'
In [38]:
sns.factorplot('Alone', data=titanic_df, palette = 'Blues', kind='count')
Out[38]:
In [39]:
titanic_df['Survivor'] = titanic_df.Survived.map({0:'no', 1: 'yes'})
sns.factorplot('Survivor', data=titanic_df, palette = 'Set1', kind='count')
Out[39]:
In [41]:
sns.factorplot('Pclass', 'Survived', data=titanic_df, palette = 'Set1')
Out[41]:
In [42]:
sns.factorplot('Sex', 'Survived', data=titanic_df, palette = 'Set1')
Out[42]:
In [44]:
sns.factorplot('Pclass', 'Survived', data=titanic_df, palette = 'Set1', hue='person')
Out[44]:
In [45]:
sns.lmplot('Age', 'Survived', data=titanic_df)
Out[45]:
In [46]:
sns.lmplot('Age', 'Survived', data=titanic_df, hue='Pclass')
Out[46]:
In [47]:
generations = [10, 20, 30, 40, 50, 60, 70, 80]
sns.lmplot('Age', 'Survived', data=titanic_df, hue='Pclass', x_bins=generations)
Out[47]:
In [48]:
sns.lmplot('Age', 'Survived', data=titanic_df, hue='Sex', x_bins=generations)
Out[48]:
In [49]:
sns.lmplot('Age', 'Survived', data=titanic_df, hue='Alone', x_bins=generations)
Out[49]:
In [52]:
sns.factorplot('Pclass', 'Survived', data=titanic_df, palette = 'Set1', hue='Alone')
Out[52]:
In [53]:
sns.factorplot('Sex', 'Survived', data=titanic_df, palette = 'Set1', hue='Alone')
Out[53]:
In [ ]: