A simple demonstration of pivot_table. Reshaping and Pivot Tables — pandas 0.13.1 documentation


In [1]:
import pandas as pd
from pandas import DataFrame, Series

In [2]:
df = DataFrame([{
'year':1880,
'name':'John',
'sex': 'M',
'births': 13
},
{'year':1880,
'name':'Pat',
'sex': 'M',
'births': 13
},
{'year':1880,
'name':'Pat',
'sex': 'F',
'births': 13
},
{
'year':1880,
'name':'Jane',
'sex': 'F',
'births': 20
}, 
{
'year':1881,
'name':'John',
'sex': 'M',
'births': 90
},
{
'year':1881,
'name':'Jane',
'sex': 'F',
'births': 21
},])

df


Out[2]:
births name sex year
0 13 John M 1880
1 13 Pat M 1880
2 13 Pat F 1880
3 20 Jane F 1880
4 90 John M 1881
5 21 Jane F 1881

6 rows × 4 columns


In [3]:
pt  = df.pivot_table(rows='year', cols=['name','sex'])['births']
pt


Out[3]:
name Jane John Pat
sex F M F M
year
1880 20 13 13 13
1881 21 90 NaN NaN

2 rows × 4 columns


In [4]:
# let's make a new table in which there is M/F subindex for all names

names = set(pt.columns.get_level_values(level=0))
sexes = set(pt.columns.get_level_values(level=1))
names, sexes


Out[4]:
({'Jane', 'John', 'Pat'}, {'F', 'M'})

In [5]:
# http://pandas.pydata.org/pandas-docs/stable/indexing.html#creating-a-multiindex-hierarchical-index-object

new_index = pd.MultiIndex.from_product([list(names), list(sexes)],
                           names=['name','sex'])
new_index


Out[5]:
MultiIndex(levels=[[u'Jane', u'John', u'Pat'], [u'F', u'M']],
           labels=[[0, 0, 1, 1, 2, 2], [1, 0, 1, 0, 1, 0]],
           names=[u'name', u'sex'])

In [6]:
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html

pt.T.reindex(new_index).T


Out[6]:
name Jane John Pat
sex M F M F M F
year
1880 NaN 20 13 NaN 13 13
1881 NaN 21 90 NaN NaN NaN

2 rows × 6 columns


In [7]:
pt.T.reindex(new_index).T.fillna(0)


Out[7]:
name Jane John Pat
sex M F M F M F
year
1880 0 20 13 0 13 13
1881 0 21 90 0 0 0

2 rows × 6 columns


In [19]:
df.groupby('year').apply(lambda s: s.groupby(['name','sex']).agg('sum'))


Out[19]:
births year
year name sex
1880 Jane F 20 1880
John M 13 1880
Pat F 13 1880
M 13 1880
1881 Jane F 21 1881
John M 90 1881

6 rows × 2 columns


In [25]:
df.groupby('year').apply(lambda s: s.groupby(['name','sex']).agg('sum')).unstack()


Out[25]:
births year
sex F M F M
year name
1880 Jane 20 NaN 1880 NaN
John NaN 13 NaN 1880
Pat 13 13 1880 1880
1881 Jane 21 NaN 1881 NaN
John NaN 90 NaN 1881

5 rows × 4 columns