In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('data/src/titanic_train.csv')
In [3]:
df.drop(labels=['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
In [4]:
df_single = df.set_index('PassengerId')
In [5]:
print(df_single.head())
In [6]:
df_multi = df.set_index(['Sex', 'Pclass', 'Embarked', 'PassengerId']).sort_index()
In [7]:
print(df_multi.head())
In [8]:
print(df_multi.tail())
In [9]:
print(df_multi.mean())
In [10]:
print(df_single.mean())
In [11]:
print(df_multi.max())
In [12]:
print(df_single.max())
In [13]:
print(df_multi.mean(level='Sex'))
In [14]:
print(df_multi.mean(level=0))
In [15]:
print(df_multi.mean(level=1))
In [16]:
print(df_multi.mean(level=2))
In [17]:
print(df_multi.mean(level=['Sex', 'Pclass']))
In [18]:
print(df_multi.mean(level=[0, 1, 2]))
In [19]:
print(df_single.groupby(by='Sex').mean())
In [20]:
print(df_single.groupby(by=['Sex', 'Pclass', 'Embarked']).mean())
In [21]:
print(df_multi.groupby(level='Sex').size())
In [22]:
print(df_multi.groupby(level=2).size())
In [23]:
print(df_multi.groupby(level=[0, 1, 2]).size())
In [24]:
print(df_single.groupby(by=['Sex', 'Pclass', 'Embarked']).size())