In [94]:
import pandas as pd
import numpy as np

%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [95]:
figsize(7, 4)

In [96]:
df = pd.read_csv('titanic.csv')

In [97]:
df.head(5)


Out[97]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35 0 0 373450 8.0500 NaN S

In [98]:
df['Survived']


Out[98]:
0     0
1     1
2     1
3     1
4     0
5     0
6     0
7     0
8     1
9     1
10    1
11    1
12    0
13    0
14    0
...
876    0
877    0
878    0
879    1
880    1
881    0
882    0
883    0
884    0
885    0
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [99]:
len(df)


Out[99]:
891

In [100]:
df['Sex'].value_counts()


Out[100]:
male      577
female    314
dtype: int64

In [101]:
df['Survived'].head(10)


Out[101]:
0    0
1    1
2    1
3    1
4    0
5    0
6    0
7    0
8    1
9    1
Name: Survived, dtype: int64

In [102]:
df['Sex'].value_counts().plot(kind='bar')


Out[102]:
<matplotlib.axes.AxesSubplot at 0x10cca2050>

In [103]:
df_survived = df[ df['Survived'] == 1 ]

In [104]:
df_survived.head(5)


Out[104]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0 113803 53.1000 C123 S
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27 0 2 347742 11.1333 NaN S
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14 1 0 237736 30.0708 NaN C

In [105]:
df0 = df[ df['Age'] < 10 ]

In [106]:
len([1, 4, 5, 6, 6])


Out[106]:
5

In [107]:
(df_survived['Pclass'].value_counts() / df['Pclass'].value_counts().astype(float)).plot(kind='bar')


Out[107]:
<matplotlib.axes.AxesSubplot at 0x10cca9450>

In [108]:
(df_survived['Sex'].value_counts() / df['Sex'].value_counts().astype(float)).plot(kind='bar')


Out[108]:
<matplotlib.axes.AxesSubplot at 0x10ce8fd10>

In [109]:
df['Age'].value_counts().plot(kind='bar')


Out[109]:
<matplotlib.axes.AxesSubplot at 0x10ce46f10>

In [110]:
df['Age'].hist(bins=30)


Out[110]:
<matplotlib.axes.AxesSubplot at 0x10d61a410>

In [111]:
df['Age'].dropna().plot(kind='kde')
df_survived['Age'].dropna().plot(kind='kde')


Out[111]:
<matplotlib.axes.AxesSubplot at 0x10d698750>

In [112]:
df[ df['Sex'] == 'male']['Age'].dropna().plot(kind='kde')
df_survived[df_survived['Sex'] == 'male']['Age'].dropna().plot(kind='kde')


Out[112]:
<matplotlib.axes.AxesSubplot at 0x10dc09890>

In [112]:


In [112]: