In [2]:
import pandas as pd

In [3]:
import matplotlib.pyplot as plt

%matplotlib inline

In [4]:
df = pd.read_csv("titanic.csv", encoding="UTF-8")

In [5]:
df.head()


Out[5]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

In [6]:
df.columns


Out[6]:
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

Questions:

  • How many men and how many women were there?
  • What was the average age of the Titanic passenger?
  • How many passengers were in first class? What about second and third?
  • How many women survived?
  • Of those who survived, how many were in first class? How many were in third class?
  • Of those who died, what percentage were men? Can you make a chart?
  • What was the average age of those who died?
  • What was the lowest fare paid? What about the highest?
  • Can you create a chart of all the fares paid?
  • Who was the youngest passenger and did he or she survive?
  • Who paid the most expensive ticket and did he or she survive?
  • Who were the five oldest passengers and did they survive?

In [7]:
#How many men and how many women were there?
df['Sex'].value_counts()


Out[7]:
male      577
female    314
Name: Sex, dtype: int64

In [8]:
#What was the average age of the Titanic passenger?
df['Age'].mean()


Out[8]:
29.69911764705882

In [9]:
#How many passengers were in first class? What about second and third?
df['Pclass'].value_counts()


Out[9]:
3    491
1    216
2    184
Name: Pclass, dtype: int64

In [10]:
#How many women survived?
df.groupby('Sex')['Survived'].sum()


Out[10]:
Sex
female    233
male      109
Name: Survived, dtype: int64

In [18]:
#Of those who survived, how many were in first class? How many were in third class?
df.groupby('Pclass')['Survived'].sum()


Out[18]:
Pclass
1    136
2     87
3    119
Name: Survived, dtype: int64

In [20]:
#Of those who survived, how many were in first class? How many were in third class?
classes = df.groupby('Pclass')['Survived'].sum()
classes.to_dict()


Out[20]:
{1: 136, 2: 87, 3: 119}

In [13]:
df.groupby('Pclass')['Survived'].sum().plot(kind='barh')


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x109b32f60>

In [62]:
df.groupby('Pclass').plot(kind='hist', x='Survived', y='Pclass')
#Question: How to create only one chart from this?


Out[62]:
Pclass
1    Axes(0.125,0.125;0.775x0.775)
2    Axes(0.125,0.125;0.775x0.775)
3    Axes(0.125,0.125;0.775x0.775)
dtype: object

In [66]:
#Of those who died, what percentage were men? Can you make a chart?

In [67]:
#What was the average age of those who died?

In [71]:
#What was the lowest fare paid? What about the highest?
df['Fare'].min()


Out[71]:
0.0

In [73]:
df['Fare'].max()


Out[73]:
512.32920000000001

In [82]:
#Can you create a chart of all the fares paid?
df.plot(kind='hist', y='Fare')


Out[82]:
<matplotlib.axes._subplots.AxesSubplot at 0x120714dd8>

In [91]:
#Who was the youngest passenger and did he or she survive?
df.sort_values('Age').head(1)


Out[91]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
803 804 1 3 Thomas, Master. Assad Alexander male 0.42 0 1 2625 8.5167 NaN C

In [93]:
# Who paid the most expensive ticket and did he or she survive?
df.sort_values('Fare', ascending=False).head(1)


Out[93]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
258 259 1 1 Ward, Miss. Anna female 35.0 0 0 PC 17755 512.3292 NaN C

In [94]:
#Who were the five oldest passengers and did they survive?
df.sort_values('Age', ascending=False).head(5)


Out[94]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
630 631 1 1 Barkworth, Mr. Algernon Henry Wilson male 80.0 0 0 27042 30.0000 A23 S
851 852 0 3 Svensson, Mr. Johan male 74.0 0 0 347060 7.7750 NaN S
493 494 0 1 Artagaveytia, Mr. Ramon male 71.0 0 0 PC 17609 49.5042 NaN C
96 97 0 1 Goldschmidt, Mr. George B male 71.0 0 0 PC 17754 34.6542 A5 C
116 117 0 3 Connors, Mr. Patrick male 70.5 0 0 370369 7.7500 NaN Q

In [ ]: