In [2]:
import pandas as pd
In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
In [4]:
df = pd.read_csv("titanic.csv", encoding="UTF-8")
In [5]:
df.head()
Out[5]:
In [6]:
df.columns
Out[6]:
In [7]:
#How many men and how many women were there?
df['Sex'].value_counts()
Out[7]:
In [8]:
#What was the average age of the Titanic passenger?
df['Age'].mean()
Out[8]:
In [9]:
#How many passengers were in first class? What about second and third?
df['Pclass'].value_counts()
Out[9]:
In [10]:
#How many women survived?
df.groupby('Sex')['Survived'].sum()
Out[10]:
In [18]:
#Of those who survived, how many were in first class? How many were in third class?
df.groupby('Pclass')['Survived'].sum()
Out[18]:
In [20]:
#Of those who survived, how many were in first class? How many were in third class?
classes = df.groupby('Pclass')['Survived'].sum()
classes.to_dict()
Out[20]:
In [13]:
df.groupby('Pclass')['Survived'].sum().plot(kind='barh')
Out[13]:
In [62]:
df.groupby('Pclass').plot(kind='hist', x='Survived', y='Pclass')
#Question: How to create only one chart from this?
Out[62]:
In [66]:
#Of those who died, what percentage were men? Can you make a chart?
In [67]:
#What was the average age of those who died?
In [71]:
#What was the lowest fare paid? What about the highest?
df['Fare'].min()
Out[71]:
In [73]:
df['Fare'].max()
Out[73]:
In [82]:
#Can you create a chart of all the fares paid?
df.plot(kind='hist', y='Fare')
Out[82]:
In [91]:
#Who was the youngest passenger and did he or she survive?
df.sort_values('Age').head(1)
Out[91]:
In [93]:
# Who paid the most expensive ticket and did he or she survive?
df.sort_values('Fare', ascending=False).head(1)
Out[93]:
In [94]:
#Who were the five oldest passengers and did they survive?
df.sort_values('Age', ascending=False).head(5)
Out[94]:
In [ ]: