Data Wrangling: Clean, Transform, Merge, Reshape


In [1]:
import pandas as pd
import numpy as np
from pandas import DataFrame, Series

Combining and merging data sets

Database-style DataFrame merges


In [3]:
df1 = DataFrame({'key' : list('bbacaab'),
                 'data1' : np.arange(7)})
df1


Out[3]:
data1 key
0 0 b
1 1 b
2 2 a
3 3 c
4 4 a
5 5 a
6 6 b

In [6]:
df2 = DataFrame({'key' : list('abd'),
                'data2' : np.arange(3)})
df2


Out[6]:
data2 key
0 0 a
1 1 b
2 2 d

In [7]:
pd.merge(df1, df2)


Out[7]:
data1 key data2
0 0 b 1
1 1 b 1
2 6 b 1
3 2 a 0
4 4 a 0
5 5 a 0

In [8]:
pd.merge(df1, df2, on ='key')


Out[8]:
data1 key data2
0 0 b 1
1 1 b 1
2 6 b 1
3 2 a 0
4 4 a 0
5 5 a 0

In [10]:
df3 = DataFrame({'lkey' : list('bbacaab'),
                 'data1' : np.arange(7)})
df4 = DataFrame({'rkey' : list('abd'),
                'data2' : np.arange(3)})
pd.merge(df3, df4, left_on = 'lkey', right_on = 'rkey')


Out[10]:
data1 lkey data2 rkey
0 0 b 1 b
1 1 b 1 b
2 6 b 1 b
3 2 a 0 a
4 4 a 0 a
5 5 a 0 a

In [11]:
pd.merge(df1, df2, how = 'outer')


Out[11]:
data1 key data2
0 0.0 b 1.0
1 1.0 b 1.0
2 6.0 b 1.0
3 2.0 a 0.0
4 4.0 a 0.0
5 5.0 a 0.0
6 3.0 c NaN
7 NaN d 2.0

In [12]:
df1 = DataFrame({'key' : list('bbacab'),
                 'data1' : np.arange(6)})
df2 = DataFrame({'key' : list('ababd'),
                 'data2' : np.arange(5)})
pd.merge(df1, df2, on = 'key', how = 'left')


Out[12]:
data1 key data2
0 0 b 1.0
1 0 b 3.0
2 1 b 1.0
3 1 b 3.0
4 2 a 0.0
5 2 a 2.0
6 3 c NaN
7 4 a 0.0
8 4 a 2.0
9 5 b 1.0
10 5 b 3.0

In [13]:
pd.merge(df1, df2, on = 'key', how = 'inner')


Out[13]:
data1 key data2
0 0 b 1
1 0 b 3
2 1 b 1
3 1 b 3
4 5 b 1
5 5 b 3
6 2 a 0
7 2 a 2
8 4 a 0
9 4 a 2

In [14]:
left = DataFrame({'key1': ['foo', 'foo', 'bar'],
                  'key2': ['one', 'two', 'one'],
                  'lval': [1, 2, 3]})
right = DataFrame({'key1': ['foo', 'foo', 'bar', 'bar'],
                   'key2': ['one', 'one', 'one', 'two'],
                   'rval': [4, 5, 6, 7]})
pd.merge(left, right, on = ['key1', 'key2'], how = 'outer')


Out[14]:
key1 key2 lval rval
0 foo one 1.0 4.0
1 foo one 1.0 5.0
2 foo two 2.0 NaN
3 bar one 3.0 6.0
4 bar two NaN 7.0

In [15]:
pd.merge(left, right, on = 'key1')


Out[15]:
key1 key2_x lval key2_y rval
0 foo one 1 one 4
1 foo one 1 one 5
2 foo two 2 one 4
3 foo two 2 one 5
4 bar one 3 one 6
5 bar one 3 two 7

In [16]:
pd.merge(left, right, on = 'key1', suffixes=('_left', '_right'))


Out[16]:
key1 key2_left lval key2_right rval
0 foo one 1 one 4
1 foo one 1 one 5
2 foo two 2 one 4
3 foo two 2 one 5
4 bar one 3 one 6
5 bar one 3 two 7

Merging on index


In [17]:
left1 = DataFrame({'key': ['a', 'b', 'a', 'a', 'b', 'c'],
                  'value': range(6)})
right1 = DataFrame({'group_val': [3.5, 7]}, index=['a', 'b'])
print(left1)
print(right1)


  key  value
0   a      0
1   b      1
2   a      2
3   a      3
4   b      4
5   c      5
   group_val
a        3.5
b        7.0

In [19]:
pd.merge(left1, right1, left_on = 'key', right_index = True)


Out[19]:
key value group_val
0 a 0 3.5
2 a 2 3.5
3 a 3 3.5
1 b 1 7.0
4 b 4 7.0

In [20]:
pd.merge(left1, right1, left_on = 'key', right_index = True, how = 'outer')


Out[20]:
key value group_val
0 a 0 3.5
2 a 2 3.5
3 a 3 3.5
1 b 1 7.0
4 b 4 7.0
5 c 5 NaN

In [21]:
lefth = DataFrame({'key1': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
                   'key2': [2000, 2001, 2002, 2001, 2002],
                   'data': np.arange(5.)})
righth = DataFrame(np.arange(12).reshape((6, 2)),
                   index=[['Nevada', 'Nevada', 'Ohio', 'Ohio', 'Ohio', 'Ohio'],
                          [2001, 2000, 2000, 2000, 2001, 2002]],
                   columns=['event1', 'event2'])
lefth


Out[21]:
data key1 key2
0 0.0 Ohio 2000
1 1.0 Ohio 2001
2 2.0 Ohio 2002
3 3.0 Nevada 2001
4 4.0 Nevada 2002

In [23]:
righth


Out[23]:
event1 event2
Nevada 2001 0 1
2000 2 3
Ohio 2000 4 5
2000 6 7
2001 8 9
2002 10 11

In [24]:
pd.merge(lefth, righth, left_on = ['key1', 'key2'], right_index = True)


Out[24]:
data key1 key2 event1 event2
0 0.0 Ohio 2000 4 5
0 0.0 Ohio 2000 6 7
1 1.0 Ohio 2001 8 9
2 2.0 Ohio 2002 10 11
3 3.0 Nevada 2001 0 1

In [25]:
pd.merge(lefth, righth, left_on = ['key1', 'key2'], right_index = True, how = 'outer')


Out[25]:
data key1 key2 event1 event2
0 0.0 Ohio 2000 4.0 5.0
0 0.0 Ohio 2000 6.0 7.0
1 1.0 Ohio 2001 8.0 9.0
2 2.0 Ohio 2002 10.0 11.0
3 3.0 Nevada 2001 0.0 1.0
4 4.0 Nevada 2002 NaN NaN
4 NaN Nevada 2000 2.0 3.0

In [26]:
left2 = DataFrame([[1., 2.], [3., 4.], [5., 6.]], index=['a', 'c', 'e'],
                 columns=['Ohio', 'Nevada'])
right2 = DataFrame([[7., 8.], [9., 10.], [11., 12.], [13, 14]],
                   index=['b', 'c', 'd', 'e'], columns=['Missouri', 'Alabama'])
left2


Out[26]:
Ohio Nevada
a 1.0 2.0
c 3.0 4.0
e 5.0 6.0

In [27]:
right2


Out[27]:
Missouri Alabama
b 7.0 8.0
c 9.0 10.0
d 11.0 12.0
e 13.0 14.0

In [28]:
pd.merge(left2, right2, how = 'outer', left_index = True, right_index = True)


Out[28]:
Ohio Nevada Missouri Alabama
a 1.0 2.0 NaN NaN
b NaN NaN 7.0 8.0
c 3.0 4.0 9.0 10.0
d NaN NaN 11.0 12.0
e 5.0 6.0 13.0 14.0

In [29]:
left2.join(right2, how = 'outer')


Out[29]:
Ohio Nevada Missouri Alabama
a 1.0 2.0 NaN NaN
b NaN NaN 7.0 8.0
c 3.0 4.0 9.0 10.0
d NaN NaN 11.0 12.0
e 5.0 6.0 13.0 14.0

In [30]:
left1.join(right1, on = 'key')


Out[30]:
key value group_val
0 a 0 3.5
1 b 1 7.0
2 a 2 3.5
3 a 3 3.5
4 b 4 7.0
5 c 5 NaN

In [31]:
another = DataFrame([[7., 8.], [9., 10.], [11., 12.], [16., 17.]],
                    index=['a', 'c', 'e', 'f'], columns=['New York', 'Oregon'])
left2.join([right2, another])


Out[31]:
Ohio Nevada Missouri Alabama New York Oregon
a 1.0 2.0 NaN NaN 7.0 8.0
c 3.0 4.0 9.0 10.0 9.0 10.0
e 5.0 6.0 13.0 14.0 11.0 12.0

In [32]:
left2.join([right2, another], how = 'outer')


Out[32]:
Ohio Nevada Missouri Alabama New York Oregon
a 1.0 2.0 NaN NaN 7.0 8.0
b NaN NaN 7.0 8.0 NaN NaN
c 3.0 4.0 9.0 10.0 9.0 10.0
d NaN NaN 11.0 12.0 NaN NaN
e 5.0 6.0 13.0 14.0 11.0 12.0
f NaN NaN NaN NaN 16.0 17.0

Concatenating along an axis


In [34]:
arr = np.arange(12).reshape((3, 4))
arr


Out[34]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [35]:
np.concatenate([arr, arr], axis = 1)


Out[35]:
array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

In [36]:
s1 = Series([0, 1], index=['a', 'b'])
s2 = Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = Series([5, 6], index=['f', 'g'])
pd.concat([s1, s2, s3])


Out[36]:
a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [37]:
pd.concat([s1, s2, s3], axis = 1)


Out[37]:
0 1 2
a 0.0 NaN NaN
b 1.0 NaN NaN
c NaN 2.0 NaN
d NaN 3.0 NaN
e NaN 4.0 NaN
f NaN NaN 5.0
g NaN NaN 6.0

In [38]:
s4 = pd.concat([s1 * 5, s3])
pd.concat([s1, s4], axis = 1)


Out[38]:
0 1
a 0.0 0
b 1.0 5
f NaN 5
g NaN 6

In [40]:
pd.concat([s1, s4], axis = 1, join = 'inner')


Out[40]:
0 1
a 0 0
b 1 5

In [41]:
pd.concat([s1, s4], axis = 1, join_axes = [['a', 'c', 'b', 'e']])


Out[41]:
0 1
a 0.0 0.0
c NaN NaN
b 1.0 5.0
e NaN NaN

In [43]:
result = pd.concat([s1, s1, s3], keys = ['one', 'two', 'three'])
result


Out[43]:
one    a    0
       b    1
two    a    0
       b    1
three  f    5
       g    6
dtype: int64

In [44]:
result.unstack()


Out[44]:
a b f g
one 0.0 1.0 NaN NaN
two 0.0 1.0 NaN NaN
three NaN NaN 5.0 6.0

In [47]:
result = pd.concat([s1, s2, s3], axis = 1, keys=['one', 'two', 'three'])
result


Out[47]:
one two three
a 0.0 NaN NaN
b 1.0 NaN NaN
c NaN 2.0 NaN
d NaN 3.0 NaN
e NaN 4.0 NaN
f NaN NaN 5.0
g NaN NaN 6.0

In [48]:
df1 = DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'],
                columns=['one', 'two'])
df2 = DataFrame(5 + np.arange(4).reshape(2, 2), index=['a', 'c'],
                columns=['three', 'four'])
pd.concat([df1, df2], axis=1, keys=['level1', 'level2'])


Out[48]:
level1 level2
one two three four
a 0 1 5.0 6.0
b 2 3 NaN NaN
c 4 5 7.0 8.0

In [49]:
pd.concat({'level1': df1, 'level2': df2}, axis=1)


Out[49]:
level1 level2
one two three four
a 0 1 5.0 6.0
b 2 3 NaN NaN
c 4 5 7.0 8.0

In [50]:
pd.concat([df1, df2], axis=1, keys=['level1', 'level2'],
          names=['upper', 'lower'])


Out[50]:
upper level1 level2
lower one two three four
a 0 1 5.0 6.0
b 2 3 NaN NaN
c 4 5 7.0 8.0

In [51]:
df1 = DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])
df2 = DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])
df1


Out[51]:
a b c d
0 -0.980089 -0.062801 -0.176774 -1.846999
1 -0.146784 -0.769171 -0.748849 0.193336
2 -1.929151 0.553847 -0.384476 0.963701

In [52]:
df2


Out[52]:
b d a
0 0.238163 -0.208719 0.052847
1 -0.839295 -0.522642 0.296476

In [53]:
pd.concat([df1, df2], ignore_index = True)


Out[53]:
a b c d
0 -0.980089 -0.062801 -0.176774 -1.846999
1 -0.146784 -0.769171 -0.748849 0.193336
2 -1.929151 0.553847 -0.384476 0.963701
3 0.052847 0.238163 NaN -0.208719
4 0.296476 -0.839295 NaN -0.522642

Combining data with overlap


In [54]:
a = Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan],
           index=['f', 'e', 'd', 'c', 'b', 'a'])
b = Series(np.arange(len(a), dtype=np.float64),
           index=['f', 'e', 'd', 'c', 'b', 'a'])
b[-1] = np.nan
a


Out[54]:
f    NaN
e    2.5
d    NaN
c    3.5
b    4.5
a    NaN
dtype: float64

In [55]:
b


Out[55]:
f    0.0
e    1.0
d    2.0
c    3.0
b    4.0
a    NaN
dtype: float64

In [56]:
np.where(pd.isnull(a), b, a)


Out[56]:
array([ 0. ,  2.5,  2. ,  3.5,  4.5,  nan])

In [57]:
b[:-2].combine_first(a[2:])


Out[57]:
a    NaN
b    4.5
c    3.0
d    2.0
e    1.0
f    0.0
dtype: float64

In [58]:
df1 = DataFrame({'a': [1., np.nan, 5., np.nan],
                 'b': [np.nan, 2., np.nan, 6.],
                 'c': range(2, 18, 4)})
df2 = DataFrame({'a': [5., 4., np.nan, 3., 7.],
                 'b': [np.nan, 3., 4., 6., 8.]})
df1.combine_first(df2)


Out[58]:
a b c
0 1.0 NaN 2.0
1 4.0 2.0 6.0
2 5.0 4.0 10.0
3 3.0 6.0 14.0
4 7.0 8.0 NaN

Reshaping and pivoting

Reshaping with hierarchical indexing


In [59]:
data = DataFrame(np.arange(6).reshape((2, 3)),
                 index=pd.Index(['Ohio', 'Colorado'], name='state'),
                 columns=pd.Index(['one', 'two', 'three'], name='number'))
data


Out[59]:
number one two three
state
Ohio 0 1 2
Colorado 3 4 5

In [61]:
result = data.stack()
result


Out[61]:
state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int64

In [62]:
result.unstack()


Out[62]:
number one two three
state
Ohio 0 1 2
Colorado 3 4 5

In [63]:
result.unstack(0)


Out[63]:
state Ohio Colorado
number
one 0 3
two 1 4
three 2 5

In [64]:
result.unstack('state')


Out[64]:
state Ohio Colorado
number
one 0 3
two 1 4
three 2 5

In [65]:
s1 = Series([0, 1, 2, 3], index=['a', 'b', 'c', 'd'])
s2 = Series([4, 5, 6], index=['c', 'd', 'e'])
data2 = pd.concat([s1, s2], keys=['one', 'two'])
data2


Out[65]:
one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: int64

In [66]:
data2.unstack()


Out[66]:
a b c d e
one 0.0 1.0 2.0 3.0 NaN
two NaN NaN 4.0 5.0 6.0

In [67]:
data2.unstack().stack()


Out[67]:
one  a    0.0
     b    1.0
     c    2.0
     d    3.0
two  c    4.0
     d    5.0
     e    6.0
dtype: float64

In [68]:
data2.unstack().stack(dropna = False)


Out[68]:
one  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
two  a    NaN
     b    NaN
     c    4.0
     d    5.0
     e    6.0
dtype: float64

In [69]:
df = DataFrame({'left': result, 'right': result + 5},
               columns=pd.Index(['left', 'right'], name='side'))
df


Out[69]:
side left right
state number
Ohio one 0 5
two 1 6
three 2 7
Colorado one 3 8
two 4 9
three 5 10

In [70]:
df.unstack('state')


Out[70]:
side left right
state Ohio Colorado Ohio Colorado
number
one 0 3 5 8
two 1 4 6 9
three 2 5 7 10

In [71]:
df.unstack('state').stack('state')


Out[71]:
side left right
number state
one Ohio 0 5
Colorado 3 8
two Ohio 1 6
Colorado 4 9
three Ohio 2 7
Colorado 5 10

Pivoting "long" to "wide" format


In [72]:
data = pd.read_csv('macrodata.csv')
periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date')
data = DataFrame(data.to_records(),
                 columns=pd.Index(['realgdp', 'infl', 'unemp'], name='item'),
                 index=periods.to_timestamp('D', 'end'))

ldata = data.stack().reset_index().rename(columns={0: 'value'})
wdata = ldata.pivot('date', 'item', 'value')

In [73]:
ldata[:10]


Out[73]:
date item value
0 1959-03-31 realgdp 2710.349
1 1959-03-31 infl 0.000
2 1959-03-31 unemp 5.800
3 1959-06-30 realgdp 2778.801
4 1959-06-30 infl 2.340
5 1959-06-30 unemp 5.100
6 1959-09-30 realgdp 2775.488
7 1959-09-30 infl 2.740
8 1959-09-30 unemp 5.300
9 1959-12-31 realgdp 2785.204

In [74]:
pivoted = ldata.pivot('date', 'item', 'value')
pivoted.head()


Out[74]:
item infl realgdp unemp
date
1959-03-31 0.00 2710.349 5.8
1959-06-30 2.34 2778.801 5.1
1959-09-30 2.74 2775.488 5.3
1959-12-31 0.27 2785.204 5.6
1960-03-31 2.31 2847.699 5.2

In [75]:
ldata['value2'] = np.random.randn(len(ldata))
ldata[:10]


Out[75]:
date item value value2
0 1959-03-31 realgdp 2710.349 -0.926624
1 1959-03-31 infl 0.000 1.463994
2 1959-03-31 unemp 5.800 -1.159041
3 1959-06-30 realgdp 2778.801 -1.431496
4 1959-06-30 infl 2.340 0.374448
5 1959-06-30 unemp 5.100 -1.173601
6 1959-09-30 realgdp 2775.488 -1.225571
7 1959-09-30 infl 2.740 -0.804099
8 1959-09-30 unemp 5.300 1.213737
9 1959-12-31 realgdp 2785.204 2.131589

In [77]:
pivoted = ldata.pivot('date', 'item')
pivoted


Out[77]:
value value2
item infl realgdp unemp infl realgdp unemp
date
1959-03-31 0.00 2710.349 5.8 1.463994 -0.926624 -1.159041
1959-06-30 2.34 2778.801 5.1 0.374448 -1.431496 -1.173601
1959-09-30 2.74 2775.488 5.3 -0.804099 -1.225571 1.213737
1959-12-31 0.27 2785.204 5.6 0.803238 2.131589 -0.849546
1960-03-31 2.31 2847.699 5.2 -0.097270 0.612834 1.000295
1960-06-30 0.14 2834.390 5.2 -1.945855 0.713613 -0.788483
1960-09-30 2.70 2839.022 5.6 -0.439161 -0.754595 0.061581
1960-12-31 1.21 2802.616 6.3 0.734427 -1.215600 -0.482346
1961-03-31 -0.40 2819.264 6.8 0.524046 -0.362159 -1.710668
1961-06-30 1.47 2872.005 7.0 -0.358013 0.413308 -0.372182
1961-09-30 0.80 2918.419 6.8 1.094208 -0.141255 -0.106314
1961-12-31 0.80 2977.830 6.2 0.995990 -0.402363 0.097444
1962-03-31 2.26 3031.241 5.6 -1.832456 0.868089 -0.054803
1962-06-30 0.13 3064.709 5.5 -1.419903 0.556615 2.422958
1962-09-30 2.11 3093.047 5.6 -1.090815 -0.764290 1.098440
1962-12-31 0.79 3100.563 5.5 0.269170 1.022082 -0.089925
1963-03-31 0.53 3141.087 5.8 0.123104 0.075912 -1.294987
1963-06-30 2.75 3180.447 5.7 0.157659 0.240797 2.394933
1963-09-30 0.78 3240.332 5.5 1.339222 -0.481064 -0.142387
1963-12-31 2.46 3264.967 5.6 -0.323708 -0.154542 0.327116
1964-03-31 0.13 3338.246 5.5 1.221692 0.676316 0.531117
1964-06-30 0.90 3376.587 5.2 -1.550854 0.098724 -1.057253
1964-09-30 1.29 3422.469 5.0 -1.282089 -0.758955 -0.555293
1964-12-31 2.05 3431.957 5.0 0.516397 -0.422232 -0.217531
1965-03-31 1.28 3516.251 4.9 -1.096032 1.008040 -1.502582
1965-06-30 2.54 3563.960 4.7 1.010981 0.124660 -0.969003
1965-09-30 0.89 3636.285 4.4 -0.963823 0.179709 0.299257
1965-12-31 2.90 3724.014 4.1 0.081546 0.887947 -0.556138
1966-03-31 4.99 3815.423 3.9 0.311277 1.072105 0.725852
1966-06-30 2.10 3828.124 3.8 -0.400068 0.505281 0.050788
... ... ... ... ... ... ...
2002-06-30 1.56 11538.770 5.8 0.291589 -0.271054 1.584669
2002-09-30 2.66 11596.430 5.7 0.471654 -0.181241 0.463452
2002-12-31 3.08 11598.824 5.8 1.361310 1.367097 0.100543
2003-03-31 1.31 11645.819 5.9 -0.243821 0.132350 -1.656875
2003-06-30 1.09 11738.706 6.2 -1.923852 -0.661571 1.492037
2003-09-30 2.60 11935.461 6.1 3.019981 0.401278 -0.403171
2003-12-31 3.02 12042.817 5.8 -0.687308 -1.100962 -0.887723
2004-03-31 2.35 12127.623 5.7 -1.106470 -0.019743 -0.286757
2004-06-30 3.61 12213.818 5.6 -0.333375 -0.542706 0.360817
2004-09-30 3.58 12303.533 5.4 -0.500924 0.163372 1.091192
2004-12-31 2.09 12410.282 5.4 -0.937768 -0.270243 -1.966059
2005-03-31 4.15 12534.113 5.3 0.290491 -1.628012 1.929309
2005-06-30 1.85 12587.535 5.1 -0.425911 -0.649207 1.052995
2005-09-30 9.14 12683.153 5.0 -1.084796 0.135103 -0.559240
2005-12-31 0.40 12748.699 4.9 0.319550 0.331795 0.297689
2006-03-31 2.60 12915.938 4.7 0.469177 0.311309 1.972427
2006-06-30 3.97 12962.462 4.7 -1.798727 3.219135 2.022561
2006-09-30 -1.58 12965.916 4.7 0.627989 0.182778 -0.718442
2006-12-31 3.30 13060.679 4.4 -1.001967 1.427433 0.497455
2007-03-31 4.58 13099.901 4.5 0.234602 -0.752268 -1.191678
2007-06-30 2.75 13203.977 4.5 -0.263329 1.072004 -0.254462
2007-09-30 3.45 13321.109 4.7 -0.344448 -0.179330 0.199381
2007-12-31 6.38 13391.249 4.8 0.583915 -0.754433 0.937132
2008-03-31 2.82 13366.865 4.9 -0.636130 -0.115850 0.043380
2008-06-30 8.53 13415.266 5.4 -1.193931 1.439950 0.902051
2008-09-30 -3.16 13324.600 6.0 0.517141 -0.034889 -1.005125
2008-12-31 -8.79 13141.920 6.9 -1.112574 0.270335 0.310107
2009-03-31 0.94 12925.410 8.1 0.824373 -1.134106 -0.486228
2009-06-30 3.37 12901.504 9.2 -1.206009 -0.082265 0.707111
2009-09-30 3.56 12990.341 9.6 0.334478 -0.200418 0.724837

203 rows × 6 columns


In [78]:
pivoted['value'][:5]


Out[78]:
item infl realgdp unemp
date
1959-03-31 0.00 2710.349 5.8
1959-06-30 2.34 2778.801 5.1
1959-09-30 2.74 2775.488 5.3
1959-12-31 0.27 2785.204 5.6
1960-03-31 2.31 2847.699 5.2

In [81]:
unstacked = ldata.set_index(['date', 'item']).unstack('item')
unstacked[:7]


Out[81]:
value value2
item infl realgdp unemp infl realgdp unemp
date
1959-03-31 0.00 2710.349 5.8 1.463994 -0.926624 -1.159041
1959-06-30 2.34 2778.801 5.1 0.374448 -1.431496 -1.173601
1959-09-30 2.74 2775.488 5.3 -0.804099 -1.225571 1.213737
1959-12-31 0.27 2785.204 5.6 0.803238 2.131589 -0.849546
1960-03-31 2.31 2847.699 5.2 -0.097270 0.612834 1.000295
1960-06-30 0.14 2834.390 5.2 -1.945855 0.713613 -0.788483
1960-09-30 2.70 2839.022 5.6 -0.439161 -0.754595 0.061581

In [ ]: