In [7]:
from pandas import DataFrame, Series

In [8]:
import pandas as pd

In [9]:
import numpy as np

In [10]:
data = Series(np.random.randn(10), 
              index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
              [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])

In [11]:
data


Out[11]:
a  1   -0.430863
   2   -0.872798
   3    0.961456
b  1    0.475739
   2    0.590467
   3   -0.537604
c  1   -0.317320
   2   -0.086924
d  2    1.006671
   3    1.045709
dtype: float64

In [11]:
data.index


Out[11]:
MultiIndex(levels=[['a', 'b', 'c', 'd'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 1, 2]])

In [12]:
data['b']


Out[12]:
1    0.475739
2    0.590467
3   -0.537604
dtype: float64

In [13]:
data['b':'c']


Out[13]:
b  1    0.475739
   2    0.590467
   3   -0.537604
c  1   -0.317320
   2   -0.086924
dtype: float64

In [14]:
data.ix[['b', 'd']]


Out[14]:
b  1    0.475739
   2    0.590467
   3   -0.537604
d  2    1.006671
   3    1.045709
dtype: float64

In [15]:
data[:, 2]


Out[15]:
a   -0.872798
b    0.590467
c   -0.086924
d    1.006671
dtype: float64

In [16]:
data.unstack()


Out[16]:
1 2 3
a -0.430863 -0.872798 0.961456
b 0.475739 0.590467 -0.537604
c -0.317320 -0.086924 NaN
d NaN 1.006671 1.045709

In [17]:
data.unstack().stack()


Out[17]:
a  1   -0.430863
   2   -0.872798
   3    0.961456
b  1    0.475739
   2    0.590467
   3   -0.537604
c  1   -0.317320
   2   -0.086924
d  2    1.006671
   3    1.045709
dtype: float64

In [18]:
frame = DataFrame(np.arange(12).reshape((4, 3)),
index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], 
                  columns=[['Ohio', 'Ohio', 'Colorado,'],
                           ['Green', 'Red', 'Green']])

In [19]:
frame


Out[19]:
Ohio Colorado
Green Red Green
a 1 0 1 2
2 3 4 5
b 1 6 7 8
2 9 10 11

In [22]:
frame.index.names = ['key1', 'key2']

In [23]:
frame.columns.names = ['state', 'color']

In [24]:
frame


Out[24]:
state Ohio Colorado
color Green Red Green
key1 key2
a 1 0 1 2
2 3 4 5
b 1 6 7 8
2 9 10 11

In [25]:
frame['Ohio']


Out[25]:
color Green Red
key1 key2
a 1 0 1
2 3 4
b 1 6 7
2 9 10

In [27]:
from pandas import MultiIndex

In [28]:
MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], 
                        ['Green', 'Red', 'Green']], 
                       names=['state', 'color'])


Out[28]:
MultiIndex(levels=[['Colorado', 'Ohio'], ['Green', 'Red']],
           labels=[[1, 1, 0], [0, 1, 0]],
           names=['state', 'color'])

In [29]:
frame.swaplevel('key1', 'key2')


Out[29]:
state Ohio Colorado
color Green Red Green
key2 key1
1 a 0 1 2
2 a 3 4 5
1 b 6 7 8
2 b 9 10 11

In [30]:
frame.sortlevel(1)


Out[30]:
state Ohio Colorado
color Green Red Green
key1 key2
a 1 0 1 2
b 1 6 7 8
a 2 3 4 5
b 2 9 10 11

In [31]:
frame.swaplevel(0, 1).sortlevel(0)


Out[31]:
state Ohio Colorado
color Green Red Green
key2 key1
1 a 0 1 2
b 6 7 8
2 a 3 4 5
b 9 10 11

In [32]:
frame.sum(level='key2')


Out[32]:
state Ohio Colorado
color Green Red Green
key2
1 6 8 10
2 12 14 16

In [33]:
frame.sum(level='color', axis=1)


Out[33]:
color Green Red
key1 key2
a 1 2 1
2 8 4
b 1 14 7
2 20 10

In [34]:
frame = DataFrame({'a': range(7), 'b': range(7, 0, -1),
'c': ['one', 'one', 'one', 'two', 'two', 'two', 'two'],
                  'd': [0, 1, 2, 0, 1, 2, 3]})

In [35]:
frame


Out[35]:
a b c d
0 0 7 one 0
1 1 6 one 1
2 2 5 one 2
3 3 4 two 0
4 4 3 two 1
5 5 2 two 2
6 6 1 two 3

In [38]:
frame2 = frame.set_index(['c', 'd'])

In [39]:
frame2


Out[39]:
a b
c d
one 0 0 7
1 1 6
2 2 5
two 0 3 4
1 4 3
2 5 2
3 6 1

In [40]:
frame.set_index(['c', 'd'], drop=False)


Out[40]:
a b c d
c d
one 0 0 7 one 0
1 1 6 one 1
2 2 5 one 2
two 0 3 4 two 0
1 4 3 two 1
2 5 2 two 2
3 6 1 two 3

In [41]:
frame2.reset_index()


Out[41]:
c d a b
0 one 0 0 7
1 one 1 1 6
2 one 2 2 5
3 two 0 3 4
4 two 1 4 3
5 two 2 5 2
6 two 3 6 1

In [42]:
ser = Series(np.arange(3.))

In [44]:
ser


Out[44]:
0    0.0
1    1.0
2    2.0
dtype: float64

In [45]:
ser2 = Series(np.arange(3.), index=['a', 'b', 'c'])

In [46]:
ser2[-1]


Out[46]:
2.0

In [47]:
ser.ix[:1]


Out[47]:
0    0.0
1    1.0
dtype: float64

In [48]:
ser3 = Series(range(3), index=[-5, 1, 3])

In [52]:
ser3.iloc[2]


Out[52]:
2

In [53]:
frame = DataFrame(np.arange(6).reshape(3, 2), index=[2, 0, 1])

In [55]:
frame.iloc[0]


Out[55]:
0    0
1    1
Name: 2, dtype: int64