In [1]:
import numpy as np
import pandas as pd
In [2]:
df = pd.read_csv('data/src/titanic_train.csv', index_col=0).drop(['Name', 'Ticket', 'SibSp', 'Parch'], axis=1)
In [3]:
print(df.head())
In [4]:
print(pd.pivot_table(df, index='Pclass', columns='Sex'))
In [5]:
print(type(pd.pivot_table(df, index='Pclass', columns='Sex')))
In [6]:
print(pd.pivot_table(df, index='Pclass', columns='Sex', values='Age'))
In [7]:
print(pd.pivot_table(df, index=['Sex', 'Pclass'], columns='Survived', values=['Age', 'Fare']))
In [8]:
print(pd.pivot_table(df, index='Sex', columns='Pclass', values='Age', margins=True))
In [9]:
print(pd.pivot_table(df, index='Sex', columns='Pclass', values='Age',
margins=True, margins_name='Total'))
In [10]:
print(pd.pivot_table(df, index='Sex', columns='Pclass', values='Age',
margins=True, aggfunc=np.min))
In [11]:
print(pd.pivot_table(df, index='Sex', columns='Pclass', values='Age',
margins=True, aggfunc=[np.min, np.max]))
In [12]:
print(pd.pivot_table(df, index='Sex', columns='Pclass', values='Age',
margins=True, aggfunc=len))
In [13]:
print(len(df))
In [14]:
print(df.isnull().sum())
In [15]:
print(pd.pivot_table(df, index='Sex', columns='Pclass', values='Age',
margins=True, aggfunc=len, dropna=False))