In [4]:
import numpy as np
import pandas as pd

In [23]:
import pandas.util.testing as tm; tm.N = 3
def unpivot(frame):
    N, K = frame.shape
    data = {'value' : frame.values.ravel('F'),
            'variable' : np.asarray(frame.columns).repeat(N),
            'date' : np.tile(np.asarray(frame.index), K)}
    return pd.DataFrame(data, columns=['date', 'variable', 'value'])
df = unpivot(tm.makeTimeDataFrame())

In [24]:
df


Out[24]:
date variable value
0 2000-01-03 A -0.166550
1 2000-01-04 A -1.540176
2 2000-01-05 A 0.931574
3 2000-01-03 B 1.045622
4 2000-01-04 B 0.808190
5 2000-01-05 B 0.358843
6 2000-01-03 C -0.146395
7 2000-01-04 C 0.389329
8 2000-01-05 C -1.102814
9 2000-01-03 D -0.073086
10 2000-01-04 D 1.538478
11 2000-01-05 D 0.035537

In [29]:
pivot_df=df.pivot(index='date',columns='variable',values='value')
pivot_df


Out[29]:
variable A B C D
date
2000-01-03 -0.166550 1.045622 -0.146395 -0.073086
2000-01-04 -1.540176 0.808190 0.389329 1.538478
2000-01-05 0.931574 0.358843 -1.102814 0.035537

In [30]:
pivot_df=df.pivot(index='date',columns='variable',values='value')
pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
pivot_df.reset_index()


Out[30]:
date A B C D
0 2000-01-03 -0.166550 1.045622 -0.146395 -0.073086
1 2000-01-04 -1.540176 0.808190 0.389329 1.538478
2 2000-01-05 0.931574 0.358843 -1.102814 0.035537

In [31]:
df['value2'] = df['value']*2
df


Out[31]:
date variable value value2
0 2000-01-03 A -0.166550 -0.333100
1 2000-01-04 A -1.540176 -3.080352
2 2000-01-05 A 0.931574 1.863148
3 2000-01-03 B 1.045622 2.091244
4 2000-01-04 B 0.808190 1.616380
5 2000-01-05 B 0.358843 0.717685
6 2000-01-03 C -0.146395 -0.292791
7 2000-01-04 C 0.389329 0.778657
8 2000-01-05 C -1.102814 -2.205628
9 2000-01-03 D -0.073086 -0.146173
10 2000-01-04 D 1.538478 3.076956
11 2000-01-05 D 0.035537 0.071074

In [63]:
pivot_df = df.pivot(index='date',columns='variable')
pivot_df


Out[63]:
value value2
variable A B C D A B C D
date
2000-01-03 -0.166550 1.045622 -0.146395 -0.073086 -0.333100 2.091244 -0.292791 -0.146173
2000-01-04 -1.540176 0.808190 0.389329 1.538478 -3.080352 1.616380 0.778657 3.076956
2000-01-05 0.931574 0.358843 -1.102814 0.035537 1.863148 0.717685 -2.205628 0.071074

In [64]:
pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
pivot_df.reset_index(level=0)


Out[64]:
date value_A value_B value_C value_D value2_A value2_B value2_C value2_D
0 2000-01-03 -0.166550 1.045622 -0.146395 -0.073086 -0.333100 2.091244 -0.292791 -0.146173
1 2000-01-04 -1.540176 0.808190 0.389329 1.538478 -3.080352 1.616380 0.778657 3.076956
2 2000-01-05 0.931574 0.358843 -1.102814 0.035537 1.863148 0.717685 -2.205628 0.071074

stack = make stuff vertically stacked

unstack = spread stuff out horizontally


In [ ]:


In [80]:
columns = pd.MultiIndex.from_tuples([
            ('A', 'cat', 'long'), ('B', 'cat', 'long'),
            ('A', 'dog', 'short'), ('B', 'dog', 'short')
        ],
        names=['exp', 'animal', 'hair_length']
    )
df = pd.DataFrame(np.random.randn(4, 4), columns=columns)
df


Out[80]:
exp A B A B
animal cat cat dog dog
hair_length long long short short
0 0.243153 -1.712484 -0.989127 1.664282
1 0.589307 0.721118 -1.424156 0.135460
2 -0.162633 -0.516633 0.099211 0.254756
3 0.987366 -1.008657 -2.541929 -0.065774

In [81]:
df.columns


Out[81]:
MultiIndex(levels=[['A', 'B'], ['cat', 'dog'], ['long', 'short']],
           labels=[[0, 1, 0, 1], [0, 0, 1, 1], [0, 0, 1, 1]],
           names=['exp', 'animal', 'hair_length'])

In [89]:
stacked = df.stack(level=['exp','animal'])
stacked


Out[89]:
hair_length long short
exp animal
0 A cat 0.243153 NaN
dog NaN -0.989127
B cat -1.712484 NaN
dog NaN 1.664282
1 A cat 0.589307 NaN
dog NaN -1.424156
B cat 0.721118 NaN
dog NaN 0.135460
2 A cat -0.162633 NaN
dog NaN 0.099211
B cat -0.516633 NaN
dog NaN 0.254756
3 A cat 0.987366 NaN
dog NaN -2.541929
B cat -1.008657 NaN
dog NaN -0.065774

In [97]:
stacked.reset_index(level=[0,1],drop=True).stack().to_frame().reset_index()


Out[97]:
animal hair_length 0
0 cat long 0.243153
1 dog short -0.989127
2 cat long -1.712484
3 dog short 1.664282
4 cat long 0.589307
5 dog short -1.424156
6 cat long 0.721118
7 dog short 0.135460
8 cat long -0.162633
9 dog short 0.099211
10 cat long -0.516633
11 dog short 0.254756
12 cat long 0.987366
13 dog short -2.541929
14 cat long -1.008657
15 dog short -0.065774

missing data


In [98]:
columns = pd.MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'),
                                         ('B', 'cat'), ('A', 'dog')],
                                        names=['exp', 'animal'])

In [99]:
index = pd.MultiIndex.from_product([('bar', 'baz', 'foo', 'qux'),
                                        ('one', 'two')],
                                       names=['first', 'second'])

In [102]:
df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=columns)
df


Out[102]:
exp A B A
animal cat dog cat dog
first second
bar one 0.331016 1.492836 -0.850707 -2.194239
two -0.170267 -0.825463 0.075396 -0.020315
baz one -0.134711 -0.139876 -2.525406 -1.073265
two 1.759569 -0.849516 -0.598635 0.162714
foo one -1.560310 -0.309663 2.585012 -1.697059
two -1.908190 1.027783 0.189191 -0.461268
qux one 0.300106 -1.834859 -0.989717 -1.022173
two 1.618693 0.423368 0.839311 -0.516362

In [110]:
df3 = df.iloc[[0, 1, 4, 7], [1, 2]]
df3


Out[110]:
exp B
animal dog cat
first second
bar one 1.492836 -0.850707
two -0.825463 0.075396
foo one -0.309663 2.585012
qux two 0.423368 0.839311

In [112]:
df


Out[112]:
exp A B A
animal cat dog cat dog
first second
bar one 0.331016 1.492836 -0.850707 -2.194239
two -0.170267 -0.825463 0.075396 -0.020315
baz one -0.134711 -0.139876 -2.525406 -1.073265
two 1.759569 -0.849516 -0.598635 0.162714
foo one -1.560310 -0.309663 2.585012 -1.697059
two -1.908190 1.027783 0.189191 -0.461268
qux one 0.300106 -1.834859 -0.989717 -1.022173
two 1.618693 0.423368 0.839311 -0.516362

groupby + reshaping


In [116]:
df.stack()


Out[116]:
exp A B
first second animal
bar one cat 0.331016 -0.850707
dog -2.194239 1.492836
two cat -0.170267 0.075396
dog -0.020315 -0.825463
baz one cat -0.134711 -2.525406
dog -1.073265 -0.139876
two cat 1.759569 -0.598635
dog 0.162714 -0.849516
foo one cat -1.560310 2.585012
dog -1.697059 -0.309663
two cat -1.908190 0.189191
dog -0.461268 1.027783
qux one cat 0.300106 -0.989717
dog -1.022173 -1.834859
two cat 1.618693 0.839311
dog -0.516362 0.423368

In [118]:
df.stack().mean(1).unstack()


Out[118]:
animal cat dog
first second
bar one -0.259846 -0.350702
two -0.047435 -0.422889
baz one -1.330059 -0.606571
two 0.580467 -0.343401
foo one 0.512351 -1.003361
two -0.859499 0.283257
qux one -0.344806 -1.428516
two 1.229002 -0.046497

The function pivot_table() can be used to create spreadsheet-style pivot tables. See the cookbook for some advanced strategies.

pivot_table = tabela dinâmica!!!


In [ ]: