In [1]:
import pandas as pd
from pandas import Series, DataFrame
In [2]:
titanic_df = pd.read_csv('train.csv')
In [4]:
titanic_df.head()
Out[4]:
In [5]:
titanic_df.info()
In [6]:
import numpy as np
In [7]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [9]:
sns.factorplot('Sex',data=titanic_df,
kind='count')
Out[9]:
In [11]:
sns.factorplot('Sex',data=titanic_df,hue='Pclass',kind='count')
Out[11]:
In [13]:
sns.factorplot('Pclass',data=titanic_df,hue='Sex',kind='count')
Out[13]:
In [15]:
def male_female_child(passenger):
age,sex = passenger
if age < 16:
return 'child'
else:
return sex
In [16]:
titanic_df['person'] = titanic_df[['Age', 'Sex']].apply(male_female_child,axis=1)
In [17]:
titanic_df[0:10]
Out[17]:
In [18]:
sns.factorplot('Pclass',data=titanic_df,hue='person',kind='count')
Out[18]:
In [19]:
titanic_df['Age'].hist(bins=70)
Out[19]:
In [20]:
titanic_df['Age'].mean()
Out[20]:
In [25]:
titanic_df['person'].value_counts()
Out[25]:
In [26]:
titanic_df['Sex'].value_counts()
Out[26]:
In [27]:
fig = sns.FacetGrid(titanic_df,hue='Sex',aspect=4)
fig.map(sns.kdeplot,'Age',shade=True)
oldest = titanic_df['Age'].max()
fig.set(xlim=(0,oldest))
fig.add_legend()
Out[27]:
In [28]:
fig = sns.FacetGrid(titanic_df,hue='person',aspect=4)
fig.map(sns.kdeplot,'Age',shade=True)
oldest = titanic_df['Age'].max()
fig.set(xlim=(0,oldest))
fig.add_legend()
Out[28]:
In [29]:
fig = sns.FacetGrid(titanic_df,hue='Pclass',aspect=4)
fig.map(sns.kdeplot,'Age',shade=True)
oldest = titanic_df['Age'].max()
fig.set(xlim=(0,oldest))
fig.add_legend()
Out[29]:
In [30]:
titanic_df.head()
Out[30]:
In [31]:
deck = titanic_df['Cabin'].dropna()
In [33]:
deck.head()
Out[33]:
In [35]:
levels = []
for level in deck:
levels.append(level[0])
cabin_df = DataFrame(levels)
cabin_df.columns = ['Cabin']
sns.factorplot('Cabin',data=cabin_df,palette='winter_d',kind='count')
Out[35]:
In [38]:
cabin_df = cabin_df[cabin_df.Cabin !='T']
sns.factorplot('Cabin',data=cabin_df,palette='summer',kind='count')
Out[38]:
In [39]:
titanic_df.head()
Out[39]:
In [45]:
sns.factorplot('Embarked',data=titanic_df,hue='Pclass',kind='count',x_order=['C','Q','S'])
Out[45]:
In [46]:
titanic_df.head()
Out[46]:
In [47]:
titanic_df['Alone'] = titanic_df.SibSp + titanic_df.Parch
In [48]:
titanic_df['Alone']
Out[48]:
In [50]:
titanic_df['Alone'].loc[titanic_df['Alone'] >0] = 'With family'
titanic_df['Alone'].loc[titanic_df['Alone'] == 0] = 'Alone'
In [52]:
titanic_df['Survivor'] = titanic_df.Survived.map({0:'no',1:'yes'})
sns.factorplot('Survivor', data=titanic_df,kind='count')
Out[52]:
In [54]:
sns.factorplot('Pclass','Survived',hue='person',data=titanic_df)
Out[54]:
In [56]:
sns.lmplot('Age','Survived',data=titanic_df)
Out[56]:
In [58]:
sns.lmplot('Age','Survived',hue='Pclass'
,data=titanic_df)
Out[58]:
In [ ]: