A simple demonstration of pivot_table. Reshaping and Pivot Tables — pandas 0.13.1 documentation



In [1]:

    
import pandas as pd
from pandas import DataFrame, Series



In [2]:

    
df = DataFrame([{
'year':1880,
'name':'John',
'sex': 'M',
'births': 13
},
{'year':1880,
'name':'Pat',
'sex': 'M',
'births': 13
},
{'year':1880,
'name':'Pat',
'sex': 'F',
'births': 13
},
{
'year':1880,
'name':'Jane',
'sex': 'F',
'births': 20
}, 
{
'year':1881,
'name':'John',
'sex': 'M',
'births': 90
},
{
'year':1881,
'name':'Jane',
'sex': 'F',
'births': 21
},])

df









    Out[2]:






  
    
      
      births
      name
      sex
      year
    
  
  
    
      0
       13
       John
       M
       1880
    
    
      1
       13
        Pat
       M
       1880
    
    
      2
       13
        Pat
       F
       1880
    
    
      3
       20
       Jane
       F
       1880
    
    
      4
       90
       John
       M
       1881
    
    
      5
       21
       Jane
       F
       1881
    
  

6 rows × 4 columns



In [3]:

    
pt  = df.pivot_table(rows='year', cols=['name','sex'])['births']
pt









    Out[3]:






  
    
      name
      Jane
      John
      Pat
    
    
      sex
      F
      M
      F
      M
    
    
      year
      
      
      
      
    
  
  
    
      1880
       20
       13
       13
       13
    
    
      1881
       21
       90
      NaN
      NaN
    
  

2 rows × 4 columns



In [4]:

    
# let's make a new table in which there is M/F subindex for all names

names = set(pt.columns.get_level_values(level=0))
sexes = set(pt.columns.get_level_values(level=1))
names, sexes









    Out[4]:





({'Jane', 'John', 'Pat'}, {'F', 'M'})



In [5]:

    
# http://pandas.pydata.org/pandas-docs/stable/indexing.html#creating-a-multiindex-hierarchical-index-object

new_index = pd.MultiIndex.from_product([list(names), list(sexes)],
                           names=['name','sex'])
new_index









    Out[5]:





MultiIndex(levels=[[u'Jane', u'John', u'Pat'], [u'F', u'M']],
           labels=[[0, 0, 1, 1, 2, 2], [1, 0, 1, 0, 1, 0]],
           names=[u'name', u'sex'])



In [6]:

    
# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html

pt.T.reindex(new_index).T









    Out[6]:






  
    
      name
      Jane
      John
      Pat
    
    
      sex
      M
      F
      M
      F
      M
      F
    
    
      year
      
      
      
      
      
      
    
  
  
    
      1880
      NaN
       20
       13
      NaN
       13
       13
    
    
      1881
      NaN
       21
       90
      NaN
      NaN
      NaN
    
  

2 rows × 6 columns



In [7]:

    
pt.T.reindex(new_index).T.fillna(0)









    Out[7]:






  
    
      name
      Jane
      John
      Pat
    
    
      sex
      M
      F
      M
      F
      M
      F
    
    
      year
      
      
      
      
      
      
    
  
  
    
      1880
       0
       20
       13
       0
       13
       13
    
    
      1881
       0
       21
       90
       0
        0
        0
    
  

2 rows × 6 columns



In [19]:

    
df.groupby('year').apply(lambda s: s.groupby(['name','sex']).agg('sum'))









    Out[19]:






  
    
      
      
      
      births
      year
    
    
      year
      name
      sex
      
      
    
  
  
    
      1880
      Jane
      F
       20
       1880
    
    
      John
      M
       13
       1880
    
    
      Pat
      F
       13
       1880
    
    
      M
       13
       1880
    
    
      1881
      Jane
      F
       21
       1881
    
    
      John
      M
       90
       1881
    
  

6 rows × 2 columns



In [25]:

    
df.groupby('year').apply(lambda s: s.groupby(['name','sex']).agg('sum')).unstack()









    Out[25]:






  
    
      
      
      births
      year
    
    
      
      sex
      F
      M
      F
      M
    
    
      year
      name
      
      
      
      
    
  
  
    
      1880
      Jane
       20
      NaN
       1880
        NaN
    
    
      John
      NaN
       13
        NaN
       1880
    
    
      Pat
       13
       13
       1880
       1880
    
    
      1881
      Jane
       21
      NaN
       1881
        NaN
    
    
      John
      NaN
       90
        NaN
       1881
    
  

5 rows × 4 columns

	births	name	sex	year
0	13	John	M	1880
1	13	Pat	M	1880
2	13	Pat	F	1880
3	20	Jane	F	1880
4	90	John	M	1881
5	21	Jane	F	1881

			births	year
year	name	sex
1880	Jane	F	20	1880
	John	M	13	1880
	Pat	F	13	1880
	Pat	M	13	1880
1881	Jane	F	21	1881
1881	John	M	90	1881