Title: Hierarchical Data In Pandas
Slug: pandas_hierarchical_data
Summary: Hierarchical Data In Pandas
Date: 2016-05-01 12:00
Category: Python
Tags: Data Wrangling
Authors: Chris Albon
In [1]:
# import modules
import pandas as pd
In [2]:
# Create dataframe
raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'],
'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'],
'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'],
'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],
'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}
df = pd.DataFrame(raw_data, columns = ['regiment', 'company', 'name', 'preTestScore', 'postTestScore'])
df
Out[2]:
In [3]:
# Set the hierarchical index but leave the columns inplace
df.set_index(['regiment', 'company'], drop=False)
df
Out[3]:
In [4]:
# Set the hierarchical index to be by regiment, and then by company
df = df.set_index(['regiment', 'company'])
df
Out[4]:
In [5]:
# View the index
df.index
Out[5]:
In [6]:
# Swap the levels in the index
df.swaplevel('regiment', 'company')
Out[6]:
In [7]:
# Summarize the results by regiment
df.sum(level='regiment')
Out[7]: