In this notebook, we'll compare MultiIndex and String representations of measurements.


In [25]:
from __future__ import print_function, division
import pandas as pd
from numpy.random import randn
from StringIO import StringIO

Creating and reading in data

Create DataFrames from random data

MultiIndex


In [2]:
columns_tuples = [('power', 'active'),
                  ('power', 'apparent'),
                  ('energy', 'active'),
                  ('energy', 'apparent'),
                  ('voltage', '')]

index = pd.MultiIndex.from_tuples(columns_tuples, names=['physical quantity', 'type']) 

pd.DataFrame(randn(2,5), columns=index)


Out[2]:
physical quantity power energy voltage
type active apparent active apparent
0 0.698756 1.366798 2.320818 -1.592383 0.155624
1 -0.817807 2.438316 1.541125 -0.393314 0.693330

String


In [3]:
columns_strings = [str(p) + (',' + str(t) if t else '') for p,t in columns_tuples]

pd.DataFrame(randn(2,5), columns=columns_strings)


Out[3]:
power,active power,apparent energy,active energy,apparent voltage
0 -0.979919 -0.431717 -0.073521 0.731000 0.109189
1 0.494156 0.410478 0.820294 -0.812069 0.256516

Read from CSV


In [4]:
CSV = """0 1 2 3 4 5
10 11 12 13 14 15
20 21 22 23 24 25
"""

MultiIndex


In [5]:
multiindex_df = pd.read_csv(StringIO(CSV), sep=" ", names=columns_tuples)
multiindex_df.columns.set_names(['physical quantity', 'type'], inplace=True)
multiindex_df


Out[5]:
physical quantity power energy voltage
type active apparent active apparent
0 1 2 3 4 5
10 11 12 13 14 15
20 21 22 23 24 25

String


In [6]:
string_df = pd.read_csv(StringIO(CSV), sep=" ", names=columns_strings)
string_df


Out[6]:
power,active power,apparent energy,active energy,apparent voltage
0 1 2 3 4 5
10 11 12 13 14 15
20 21 22 23 24 25

Save to HDF5


In [7]:
store = pd.HDFStore('column_test.h5', 'w')
store.put('string_df', string_df)
store.put('multiindex_df', multiindex_df)
store.close()

Read from HDF5


In [8]:
store = pd.HDFStore('column_test.h5', 'r')
store.get('multiindex_df')


Out[8]:
physical quantity power energy voltage
type active apparent active apparent
0 1 2 3 4 5
10 11 12 13 14 15
20 21 22 23 24 25

In [9]:
store.get('string_df')


Out[9]:
power,active power,apparent energy,active energy,apparent voltage
0 1 2 3 4 5
10 11 12 13 14 15
20 21 22 23 24 25

In [10]:
store.close()

Dump to CSV


In [11]:
def print_csv(df):
    output_csv = StringIO()
    df.to_csv(output_csv)df.columns.set_names(LEVEL_NAMES, inplace=True)
    print(output_csv.getvalue())
    
print_csv(multiindex_df)


physical quantity,power,power,energy,energy,voltage
type,active,apparent,active,apparent,
,,,,,
0,1,2,3,4,5
10,11,12,13,14,15
20,21,22,23,24,25


In [12]:
print_csv(string_df)


,"power,active","power,apparent","energy,active","energy,apparent",voltage
0,1,2,3,4,5
10,11,12,13,14,15
20,21,22,23,24,25

Select data

Select single column


In [13]:
# MultiIndex
multiindex_df['power', 'active']


Out[13]:
0      1
10    11
20    21
Name: (power, active), dtype: int64

In [14]:
# String
string_df['power,active']


Out[14]:
0      1
10    11
20    21
Name: power,active, dtype: int64

select all power columns


In [15]:
multiindex_df['power'] # elegant way to get both 'active' and 'apparent' power data


Out[15]:
type active apparent
0 1 2
10 11 12
20 21 22

In [16]:
# or like this:
multiindex_df['power']['active']


Out[16]:
0      1
10    11
20    21
Name: active, dtype: int64

In [17]:
# String:
string_df[[col for col in string_df.columns if 'power' in col]]


Out[17]:
power,active power,apparent
0 1 2
10 11 12
20 21 22

list of all available 'power' AC types:


In [18]:
multiindex_df['power'].columns


Out[18]:
Index([u'active', u'apparent'], dtype='object')

In [19]:
[col.split(',')[1] for col in string_df.columns if 'power' in col]


Out[19]:
['active', 'apparent']

get all 'active' physical quantities


In [20]:
multiindex_df.xs('active', level='type', axis=1)


Out[20]:
physical quantity power energy
0 1 3
10 11 13
20 21 23

In [21]:
# String:
string_df[[col for col in string_df.columns if 'active' in col]]


Out[21]:
power,active energy,active
0 1 3
10 11 13
20 21 23

Iterate


In [26]:
for (physical_quantity, ac_type), series in multiindex_df.iteritems():
    print(physical_quantity, ac_type)


power active
power apparent
energy active
energy apparent
voltage 

In [ ]: