In [94]:
import pandas as pd
import numpy as np
%pylab inline
In [95]:
figsize(7, 4)
In [96]:
df = pd.read_csv('titanic.csv')
In [97]:
df.head(5)
Out[97]:
In [98]:
df['Survived']
Out[98]:
In [99]:
len(df)
Out[99]:
In [100]:
df['Sex'].value_counts()
Out[100]:
In [101]:
df['Survived'].head(10)
Out[101]:
In [102]:
df['Sex'].value_counts().plot(kind='bar')
Out[102]:
In [103]:
df_survived = df[ df['Survived'] == 1 ]
In [104]:
df_survived.head(5)
Out[104]:
In [105]:
df0 = df[ df['Age'] < 10 ]
In [106]:
len([1, 4, 5, 6, 6])
Out[106]:
In [107]:
(df_survived['Pclass'].value_counts() / df['Pclass'].value_counts().astype(float)).plot(kind='bar')
Out[107]:
In [108]:
(df_survived['Sex'].value_counts() / df['Sex'].value_counts().astype(float)).plot(kind='bar')
Out[108]:
In [109]:
df['Age'].value_counts().plot(kind='bar')
Out[109]:
In [110]:
df['Age'].hist(bins=30)
Out[110]:
In [111]:
df['Age'].dropna().plot(kind='kde')
df_survived['Age'].dropna().plot(kind='kde')
Out[111]:
In [112]:
df[ df['Sex'] == 'male']['Age'].dropna().plot(kind='kde')
df_survived[df_survived['Sex'] == 'male']['Age'].dropna().plot(kind='kde')
Out[112]:
In [112]:
In [112]: