In this notebook, we'll compare MultiIndex and String representations of measurements.
In [25]:
from __future__ import print_function, division
import pandas as pd
from numpy.random import randn
from StringIO import StringIO
In [2]:
columns_tuples = [('power', 'active'),
('power', 'apparent'),
('energy', 'active'),
('energy', 'apparent'),
('voltage', '')]
index = pd.MultiIndex.from_tuples(columns_tuples, names=['physical quantity', 'type'])
pd.DataFrame(randn(2,5), columns=index)
Out[2]:
In [3]:
columns_strings = [str(p) + (',' + str(t) if t else '') for p,t in columns_tuples]
pd.DataFrame(randn(2,5), columns=columns_strings)
Out[3]:
In [4]:
CSV = """0 1 2 3 4 5
10 11 12 13 14 15
20 21 22 23 24 25
"""
In [5]:
multiindex_df = pd.read_csv(StringIO(CSV), sep=" ", names=columns_tuples)
multiindex_df.columns.set_names(['physical quantity', 'type'], inplace=True)
multiindex_df
Out[5]:
In [6]:
string_df = pd.read_csv(StringIO(CSV), sep=" ", names=columns_strings)
string_df
Out[6]:
In [7]:
store = pd.HDFStore('column_test.h5', 'w')
store.put('string_df', string_df)
store.put('multiindex_df', multiindex_df)
store.close()
In [8]:
store = pd.HDFStore('column_test.h5', 'r')
store.get('multiindex_df')
Out[8]:
In [9]:
store.get('string_df')
Out[9]:
In [10]:
store.close()
In [11]:
def print_csv(df):
output_csv = StringIO()
df.to_csv(output_csv)df.columns.set_names(LEVEL_NAMES, inplace=True)
print(output_csv.getvalue())
print_csv(multiindex_df)
In [12]:
print_csv(string_df)
In [13]:
# MultiIndex
multiindex_df['power', 'active']
Out[13]:
In [14]:
# String
string_df['power,active']
Out[14]:
In [15]:
multiindex_df['power'] # elegant way to get both 'active' and 'apparent' power data
Out[15]:
In [16]:
# or like this:
multiindex_df['power']['active']
Out[16]:
In [17]:
# String:
string_df[[col for col in string_df.columns if 'power' in col]]
Out[17]:
In [18]:
multiindex_df['power'].columns
Out[18]:
In [19]:
[col.split(',')[1] for col in string_df.columns if 'power' in col]
Out[19]:
In [20]:
multiindex_df.xs('active', level='type', axis=1)
Out[20]:
In [21]:
# String:
string_df[[col for col in string_df.columns if 'active' in col]]
Out[21]:
In [26]:
for (physical_quantity, ac_type), series in multiindex_df.iteritems():
print(physical_quantity, ac_type)
In [ ]: