In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_hdf('foo.h5','df')
df


Out[2]:
A B C D
0 2013-01-01 bar -1.143918 0
1 2013-01-02 bar 0.633685 2
2 2013-01-03 bar 0.304527 0
3 2013-01-04 foo -0.477373 3
4 2013-01-05 foo -0.327473 0
5 2013-01-06 foo 0.008155 3
6 2013-01-07 bar -0.377228 3
7 2013-01-08 foo 0.196635 3
8 2013-01-09 baz -0.783700 0
9 2013-01-10 baz -0.650373 3
10 2013-01-11 foo 0.555578 4
11 2013-01-12 foo 1.222324 2

In [21]:
x = df['A']-df['A'].shift(1)
x.iloc[1]


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-21-eeaf4bd42a61> in <module>()
      1 x = df['A']-df['A'].shift(1)
----> 2 x.iloc[1].__mro__

AttributeError: 'Timedelta' object has no attribute '__mro__'

In [4]:
df.groupby('B')['D'].sum()


Out[4]:
B
bar     5
baz     3
foo    15
Name: D, dtype: int64

In [6]:
pd.get_dummies(df['B'])


Out[6]:
bar baz foo
0 1 0 0
1 1 0 0
2 1 0 0
3 0 0 1
4 0 0 1
5 0 0 1
6 1 0 0
7 0 0 1
8 0 1 0
9 0 1 0
10 0 0 1
11 0 0 1

In [7]:
df['A'].dt.tz_localize('EST')


Out[7]:
0     2013-01-01 00:00:00-05:00
1     2013-01-02 00:00:00-05:00
2     2013-01-03 00:00:00-05:00
3     2013-01-04 00:00:00-05:00
4     2013-01-05 00:00:00-05:00
5     2013-01-06 00:00:00-05:00
6     2013-01-07 00:00:00-05:00
7     2013-01-08 00:00:00-05:00
8     2013-01-09 00:00:00-05:00
9     2013-01-10 00:00:00-05:00
10    2013-01-11 00:00:00-05:00
11    2013-01-12 00:00:00-05:00
dtype: object

In [8]:
df['A'].dt.tz_localize('EST').dt.tz_convert('CET')


Out[8]:
0     2013-01-01 06:00:00+01:00
1     2013-01-02 06:00:00+01:00
2     2013-01-03 06:00:00+01:00
3     2013-01-04 06:00:00+01:00
4     2013-01-05 06:00:00+01:00
5     2013-01-06 06:00:00+01:00
6     2013-01-07 06:00:00+01:00
7     2013-01-08 06:00:00+01:00
8     2013-01-09 06:00:00+01:00
9     2013-01-10 06:00:00+01:00
10    2013-01-11 06:00:00+01:00
11    2013-01-12 06:00:00+01:00
dtype: object

In [14]:
s = df['B'].astype('object') + ' ' + 'foobar')
s


Out[14]:
0     bar foobar
1     bar foobar
2     bar foobar
3     foo foobar
4     foo foobaz
5     foo foobaz
6     bar foobar
7     foo foobar
8     baz foobar
9     baz foobar
10    foo foobar
11    foo foobar
Name: B, dtype: object

In [17]:
pd.rolling_sum(df['D'],3)


Out[17]:
0    NaN
1    NaN
2      2
3      5
4      3
5      6
6      6
7      9
8      6
9      6
10     7
11     9
dtype: float64

In [18]:
df['D'].sum()


Out[18]:
23

In [25]:
df.query('B==["bar"]')


Out[25]:
A B C D
0 2013-01-01 bar -1.143918 0
1 2013-01-02 bar 0.633685 2
2 2013-01-03 bar 0.304527 0
3 2013-01-04 bar -0.477373 3
4 2013-01-05 bar -0.327473 0
5 2013-01-06 bar 0.008155 3
6 2013-01-07 bar -0.377228 3
7 2013-01-08 bar 0.196635 3
8 2013-01-09 bar -0.783700 0
9 2013-01-10 bar -0.650373 3
10 2013-01-11 bar 0.555578 4
11 2013-01-12 bar 1.222324 2

In [26]:
df.loc[df.B=='bar']


Out[26]:
A B C D
0 2013-01-01 bar -1.143918 0
1 2013-01-02 bar 0.633685 2
2 2013-01-03 bar 0.304527 0
3 2013-01-04 bar -0.477373 3
4 2013-01-05 bar -0.327473 0
5 2013-01-06 bar 0.008155 3
6 2013-01-07 bar -0.377228 3
7 2013-01-08 bar 0.196635 3
8 2013-01-09 bar -0.783700 0
9 2013-01-10 bar -0.650373 3
10 2013-01-11 bar 0.555578 4
11 2013-01-12 bar 1.222324 2

In [ ]:
df[df['B']=='bar']

In [28]:
df = DataFrame(columns=list('ABC'))
df.loc[1] = 2
df.loc[2] = 5
df


Out[28]:
A B C
1 2 2 2
2 5 5 5

In [ ]:
for x in row:
    df = df.append(....)
    
l = []
for x in row:
    l.append(x)
    
pd.concat(l)

In [31]:
df.groupby(pd.cut(Series(np.random.randn(10)),bins=3)).sum()


Out[31]:
A B C
(-2.139, -1.349] NaN NaN NaN
(-1.349, -0.561] 2 2 2
(-0.561, 0.227] 5 5 5

In [ ]: